From 095327f1f03e93a702e98a7b0a70e0c13ba56176 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 1 Oct 2024 15:04:16 +0700
Subject: [PATCH 1/2] fix: correct model_id for chat

---
 engine/commands/chat_completion_cmd.cc |  14 ++-
 engine/commands/chat_completion_cmd.h  |   4 +-
 engine/commands/model_start_cmd.cc     |  75 ++-----------
 engine/commands/model_start_cmd.h      |   2 -
 engine/commands/model_status_cmd.cc    |  43 +-------
 engine/commands/model_status_cmd.h     |   3 -
 engine/commands/model_stop_cmd.cc      |  39 ++-----
 engine/commands/run_cmd.cc             |   6 +-
 engine/controllers/models.cc           |  52 ++++++++-
 engine/controllers/models.h            |   8 ++
 engine/services/model_service.cc       | 145 ++++++++++++++++++++++++-
 engine/services/model_service.h        |   9 ++
 12 files changed, 243 insertions(+), 157 deletions(-)

diff --git a/engine/commands/chat_completion_cmd.cc b/engine/commands/chat_completion_cmd.cc
index fb228f021..1ebaa8b1a 100644
--- a/engine/commands/chat_completion_cmd.cc
+++ b/engine/commands/chat_completion_cmd.cc
@@ -4,10 +4,11 @@
 #include "cortex_upd_cmd.h"
 #include "database/models.h"
 #include "model_status_cmd.h"
+#include "run_cmd.h"
 #include "server_start_cmd.h"
 #include "trantor/utils/Logger.h"
 #include "utils/logging_utils.h"
-#include "run_cmd.h"
+#include "config/yaml_config.h"
 
 namespace commands {
 namespace {
@@ -39,7 +40,7 @@ struct ChunkParser {
 };
 
 void ChatCompletionCmd::Exec(const std::string& host, int port,
-                   const std::string& model_handle, std::string msg) {
+                             const std::string& model_handle, std::string msg) {
   cortex::db::Models modellist_handler;
   config::YamlHandler yaml_handler;
   try {
@@ -50,7 +51,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
     }
     yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
     auto mc = yaml_handler.GetModelConfig();
-    Exec(host, port, mc, std::move(msg));
+    Exec(host, port, model_handle, mc, std::move(msg));
   } catch (const std::exception& e) {
     CLI_LOG("Fail to start model information with ID '" + model_handle +
             "': " + e.what());
@@ -58,7 +59,8 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
 }
 
 void ChatCompletionCmd::Exec(const std::string& host, int port,
-                   const config::ModelConfig& mc, std::string msg) {
+                             const std::string& model_handle,
+                             const config::ModelConfig& mc, std::string msg) {
   auto address = host + ":" + std::to_string(port);
   // Check if server is started
   {
@@ -71,7 +73,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
 
   // Only check if llamacpp engine
   if ((mc.engine.find("llamacpp") != std::string::npos) &&
-      !commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
+      !commands::ModelStatusCmd().IsLoaded(host, port, model_handle)) {
     CLI_LOG("Model is not loaded yet!");
     return;
   }
@@ -104,7 +106,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
         histories_.push_back(std::move(new_data));
         json_data["engine"] = mc.engine;
         json_data["messages"] = histories_;
-        json_data["model"] = mc.name;
+        json_data["model"] = model_handle;
         //TODO: support non-stream
         json_data["stream"] = true;
         json_data["stop"] = mc.stop;
diff --git a/engine/commands/chat_completion_cmd.h b/engine/commands/chat_completion_cmd.h
index bd488e91f..d962485bc 100644
--- a/engine/commands/chat_completion_cmd.h
+++ b/engine/commands/chat_completion_cmd.h
@@ -9,8 +9,8 @@ class ChatCompletionCmd {
  public:
   void Exec(const std::string& host, int port, const std::string& model_handle,
             std::string msg);
-  void Exec(const std::string& host, int port, const config::ModelConfig& mc,
-            std::string msg);
+  void Exec(const std::string& host, int port, const std::string& model_handle,
+            const config::ModelConfig& mc, std::string msg);
 
  private:
   std::vector<nlohmann::json> histories_;
diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc
index 2b0c8f2b9..dee1fe295 100644
--- a/engine/commands/model_start_cmd.cc
+++ b/engine/commands/model_start_cmd.cc
@@ -5,6 +5,7 @@
 #include "model_status_cmd.h"
 #include "nlohmann/json.hpp"
 #include "server_start_cmd.h"
+#include "services/model_service.h"
 #include "trantor/utils/Logger.h"
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
@@ -12,77 +13,15 @@
 namespace commands {
 bool ModelStartCmd::Exec(const std::string& host, int port,
                          const std::string& model_handle) {
+  ModelService ms;
+  auto res = ms.StartModel(host, port, model_handle);
 
-  cortex::db::Models modellist_handler;
-  config::YamlHandler yaml_handler;
-  try {
-    auto model_entry = modellist_handler.GetModelInfo(model_handle);
-    if (model_entry.has_error()) {
-      CLI_LOG("Error: " + model_entry.error());
-      return false;
-    }
-    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
-    auto mc = yaml_handler.GetModelConfig();
-    return Exec(host, port, mc);
-  } catch (const std::exception& e) {
-    CLI_LOG("Fail to start model information with ID '" + model_handle +
-            "': " + e.what());
+  if (res.has_error()) {
+    CLI_LOG("Error: " + res.error());
     return false;
   }
-}
-
-bool ModelStartCmd::Exec(const std::string& host, int port,
-                         const config::ModelConfig& mc) {
-  // Check if server is started
-  if (!commands::IsServerAlive(host, port)) {
-    CLI_LOG("Server is not started yet, please run `"
-            << commands::GetCortexBinary() << " start` to start server!");
-    return false;
-  }
-
-  // Only check for llamacpp for now
-  if ((mc.engine.find("llamacpp") != std::string::npos) &&
-      commands::ModelStatusCmd().IsLoaded(host, port, mc)) {
-    CLI_LOG("Model has already been started!");
-    return true;
-  }
-
-  httplib::Client cli(host + ":" + std::to_string(port));
-
-  nlohmann::json json_data;
-  if (mc.files.size() > 0) {
-    // TODO(sang) support multiple files
-    json_data["model_path"] = mc.files[0];
-  } else {
-    LOG_WARN << "model_path is empty";
-    return false;
-  }
-  json_data["model"] = mc.name;
-  json_data["system_prompt"] = mc.system_template;
-  json_data["user_prompt"] = mc.user_template;
-  json_data["ai_prompt"] = mc.ai_template;
-  json_data["ctx_len"] = mc.ctx_len;
-  json_data["stop"] = mc.stop;
-  json_data["engine"] = mc.engine;
-
-  auto data_str = json_data.dump();
-  cli.set_read_timeout(std::chrono::seconds(60));
-  auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(),
-                      data_str.data(), data_str.size(), "application/json");
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      CLI_LOG("Model loaded!");
-      return true;
-    } else {
-      CTL_ERR("Model failed to load with status code: " << res->status);
-      return false;
-    }
-  } else {
-    auto err = res.error();
-    CTL_ERR("HTTP error: " << httplib::to_string(err));
-    return false;
-  }
-  return false;
+  CLI_LOG("Model loaded!");
+  return true;
 }
 
 };  // namespace commands
diff --git a/engine/commands/model_start_cmd.h b/engine/commands/model_start_cmd.h
index fbf3c0645..cd039c02b 100644
--- a/engine/commands/model_start_cmd.h
+++ b/engine/commands/model_start_cmd.h
@@ -1,6 +1,5 @@
 #pragma once
 #include <string>
-#include "config/model_config.h"
 
 namespace commands {
 
@@ -8,6 +7,5 @@ class ModelStartCmd {
  public:
   bool Exec(const std::string& host, int port, const std::string& model_handle);
 
-  bool Exec(const std::string& host, int port, const config::ModelConfig& mc);
 };
 }  // namespace commands
diff --git a/engine/commands/model_status_cmd.cc b/engine/commands/model_status_cmd.cc
index 38ff17bdc..ffa6cfcb5 100644
--- a/engine/commands/model_status_cmd.cc
+++ b/engine/commands/model_status_cmd.cc
@@ -4,49 +4,18 @@
 #include "httplib.h"
 #include "nlohmann/json.hpp"
 #include "utils/logging_utils.h"
+#include "services/model_service.h"
 
 namespace commands {
 bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
                               const std::string& model_handle) {
-  cortex::db::Models modellist_handler;
-  config::YamlHandler yaml_handler;
-  try {
-    auto model_entry = modellist_handler.GetModelInfo(model_handle);
-    if (model_entry.has_error()) {
-      CLI_LOG("Error: " + model_entry.error());
-      return false;
-    }
-    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
-    auto mc = yaml_handler.GetModelConfig();
-    return IsLoaded(host, port, mc);
-  } catch (const std::exception& e) {
-    CLI_LOG("Fail to get model status with ID '" + model_handle +
-            "': " + e.what());
-    return false;
-  }
-}
+  ModelService ms;
+  auto res = ms.GetModelStatus(host, port, model_handle);
 
-bool ModelStatusCmd::IsLoaded(const std::string& host, int port,
-                              const config::ModelConfig& mc) {
-  httplib::Client cli(host + ":" + std::to_string(port));
-  nlohmann::json json_data;
-  json_data["model"] = mc.name;
-  json_data["engine"] = mc.engine;
-
-  auto data_str = json_data.dump();
-
-  auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
-                      data_str.data(), data_str.size(), "application/json");
-  if (res) {
-    if (res->status == httplib::StatusCode::OK_200) {
-      return true;
-    }
-  } else {
-    auto err = res.error();
-    CTL_WRN("HTTP error: " << httplib::to_string(err));
+  if (res.has_error()) {
+    // CLI_LOG("Error: " + res.error());
     return false;
   }
-
-  return false;
+  return true;
 }
 }  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_status_cmd.h b/engine/commands/model_status_cmd.h
index 273d73ef9..9a9e90b95 100644
--- a/engine/commands/model_status_cmd.h
+++ b/engine/commands/model_status_cmd.h
@@ -1,6 +1,5 @@
 #pragma once
 #include <string>
-#include "config/yaml_config.h"
 
 namespace commands {
 
@@ -8,7 +7,5 @@ class ModelStatusCmd {
  public:
   bool IsLoaded(const std::string& host, int port,
                 const std::string& model_handle);
-  bool IsLoaded(const std::string& host, int port,
-                const config::ModelConfig& mc);
 };
 }  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/model_stop_cmd.cc b/engine/commands/model_stop_cmd.cc
index 86dd37010..41500bfd1 100644
--- a/engine/commands/model_stop_cmd.cc
+++ b/engine/commands/model_stop_cmd.cc
@@ -5,45 +5,20 @@
 #include "nlohmann/json.hpp"
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
+#include "services/model_service.h"
 
 namespace commands {
 
 void ModelStopCmd::Exec(const std::string& host, int port,
                         const std::string& model_handle) {
-  cortex::db::Models modellist_handler;
-  config::YamlHandler yaml_handler;
-  try {
-    auto model_entry = modellist_handler.GetModelInfo(model_handle);
-    if (model_entry.has_error()) {
-      CLI_LOG("Error: " + model_entry.error());
-      return;
-    }
-    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
-    auto mc = yaml_handler.GetModelConfig();
-    httplib::Client cli(host + ":" + std::to_string(port));
-    nlohmann::json json_data;
-    json_data["model"] = mc.name;
-    json_data["engine"] = mc.engine;
+  ModelService ms;
+  auto res = ms.StopModel(host, port, model_handle);
 
-    auto data_str = json_data.dump();
-
-    auto res = cli.Post("/inferences/server/unloadmodel", httplib::Headers(),
-                        data_str.data(), data_str.size(), "application/json");
-    if (res) {
-      if (res->status == httplib::StatusCode::OK_200) {
-        // LOG_INFO << res->body;
-        CLI_LOG("Model unloaded!");
-      } else {
-        CLI_LOG("Error: could not unload model - " << res->status);
-      }
-    } else {
-      auto err = res.error();
-      CTL_ERR("HTTP error: " << httplib::to_string(err));
-    }
-  } catch (const std::exception& e) {
-    CLI_LOG("Fail to stop model information with ID '" + model_handle +
-            "': " + e.what());
+  if (res.has_error()) {
+    CLI_LOG("Error: " + res.error());
+    return;
   }
+  CLI_LOG("Model unloaded!");
 }
 
 };  // namespace commands
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
index 2fff4c285..0d2b56455 100644
--- a/engine/commands/run_cmd.cc
+++ b/engine/commands/run_cmd.cc
@@ -72,8 +72,8 @@ void RunCmd::Exec(bool chat_flag) {
     // If it is llamacpp, then check model status first
     {
       if ((mc.engine.find("llamacpp") == std::string::npos) ||
-          !commands::ModelStatusCmd().IsLoaded(host_, port_, mc)) {
-        if (!ModelStartCmd().Exec(host_, port_, mc)) {
+          !commands::ModelStatusCmd().IsLoaded(host_, port_, model_handle_)) {
+        if (!ModelStartCmd().Exec(host_, port_, model_handle_)) {
           return;
         }
       }
@@ -81,7 +81,7 @@ void RunCmd::Exec(bool chat_flag) {
 
     // Chat
     if (chat_flag) {
-      ChatCompletionCmd().Exec(host_, port_, mc, "");
+      ChatCompletionCmd().Exec(host_, port_, model_handle_, mc, "");
     } else {
       CLI_LOG(*model_id << " model started successfully. Use `"
                         << commands::GetCortexBinary() << " chat " << *model_id
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 6ac0c1664..0b3754aab 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -226,9 +226,8 @@ void Models::ImportModel(
                                  std::filesystem::path("imported") /
                                  std::filesystem::path(modelHandle + ".yml"))
                                     .string();
-  cortex::db::ModelEntry model_entry{
-      modelHandle,     "local",     "imported",
-      model_yaml_path, modelHandle};
+  cortex::db::ModelEntry model_entry{modelHandle, "local", "imported",
+                                     model_yaml_path, modelHandle};
   try {
     std::filesystem::create_directories(
         std::filesystem::path(model_yaml_path).parent_path());
@@ -331,3 +330,50 @@ void Models::SetModelAlias(
     callback(resp);
   }
 }
+
+void Models::StartModel(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback) {
+  if (!http_util::HasFieldInReq(req, callback, "model"))
+    return;
+  auto config = file_manager_utils::GetCortexConfig();
+  auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
+  auto result = model_service_.StartModel(
+      config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
+  if (result.has_error()) {
+    Json::Value ret;
+    ret["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(drogon::k400BadRequest);
+    callback(resp);
+  } else {
+    Json::Value ret;
+    ret["message"] = "Started successfully!";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
+
+void Models::StopModel(const HttpRequestPtr& req,
+                       std::function<void(const HttpResponsePtr&)>&& callback) {
+  if (!http_util::HasFieldInReq(req, callback, "model"))
+    return;
+  auto config = file_manager_utils::GetCortexConfig();
+  auto model_handle = (*(req->getJsonObject())).get("model", "").asString();
+  auto result = model_service_.StopModel(
+      config.apiServerHost, std::stoi(config.apiServerPort), model_handle);
+  if (result.has_error()) {
+    Json::Value ret;
+    ret["message"] = result.error();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(drogon::k400BadRequest);
+    callback(resp);
+  } else {
+    Json::Value ret;
+    ret["message"] = "Started successfully!";
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  }
+}
\ No newline at end of file
diff --git a/engine/controllers/models.h b/engine/controllers/models.h
index 9c67ff7dc..41511ebc7 100644
--- a/engine/controllers/models.h
+++ b/engine/controllers/models.h
@@ -18,6 +18,8 @@ class Models : public drogon::HttpController<Models> {
   METHOD_ADD(Models::ImportModel, "/import", Post);
   METHOD_ADD(Models::DeleteModel, "/{1}", Delete);
   METHOD_ADD(Models::SetModelAlias, "/alias", Post);
+  METHOD_ADD(Models::StartModel, "/start", Post);
+  METHOD_ADD(Models::StopModel, "/stop", Post);
   METHOD_LIST_END
 
   void PullModel(const HttpRequestPtr& req,
@@ -39,6 +41,12 @@ class Models : public drogon::HttpController<Models> {
       const HttpRequestPtr& req,
       std::function<void(const HttpResponsePtr&)>&& callback) const;
 
+  void StartModel(const HttpRequestPtr& req,
+                  std::function<void(const HttpResponsePtr&)>&& callback);
+
+  void StopModel(const HttpRequestPtr& req,
+                 std::function<void(const HttpResponsePtr&)>&& callback);
+
  private:
   ModelService model_service_;
 };
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 344c9506a..4944db752 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -5,6 +5,7 @@
 #include "config/gguf_parser.h"
 #include "config/yaml_config.h"
 #include "database/models.h"
+#include "httplib.h"
 #include "utils/cli_selection_utils.h"
 #include "utils/file_manager_utils.h"
 #include "utils/huggingface_utils.h"
@@ -338,7 +339,7 @@ cpp::result<void, std::string> ModelService::DeleteModel(
   try {
     auto model_entry = modellist_handler.GetModelInfo(model_handle);
     if (model_entry.has_error()) {
-      CLI_LOG("Error: " + model_entry.error());
+      CTL_WRN("Error: " + model_entry.error());
       return cpp::fail(model_entry.error());
     }
     yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
@@ -373,3 +374,145 @@ cpp::result<void, std::string> ModelService::DeleteModel(
                      "': " + e.what());
   }
 }
+
+cpp::result<bool, std::string> ModelService::StartModel(
+    const std::string& host, int port, const std::string& model_handle) {
+
+  cortex::db::Models modellist_handler;
+  config::YamlHandler yaml_handler;
+
+  try {
+    auto model_entry = modellist_handler.GetModelInfo(model_handle);
+    if (model_entry.has_error()) {
+      CTL_WRN("Error: " + model_entry.error());
+      return cpp::fail(model_entry.error());
+    }
+    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
+    auto mc = yaml_handler.GetModelConfig();
+
+    httplib::Client cli(host + ":" + std::to_string(port));
+
+    Json::Value json_data = mc.ToJson();
+    if (mc.files.size() > 0) {
+      // TODO(sang) support multiple files
+      json_data["model_path"] = mc.files[0];
+    } else {
+      LOG_WARN << "model_path is empty";
+      return false;
+    }
+    json_data["model"] = model_handle;
+    json_data["system_prompt"] = mc.system_template;
+    json_data["user_prompt"] = mc.user_template;
+    json_data["ai_prompt"] = mc.ai_template;
+
+    auto data_str = json_data.toStyledString();
+    CTL_INF(data_str);
+    cli.set_read_timeout(std::chrono::seconds(60));
+    auto res = cli.Post("/inferences/server/loadmodel", httplib::Headers(),
+                        data_str.data(), data_str.size(), "application/json");
+    if (res) {
+      if (res->status == httplib::StatusCode::OK_200) {       
+        return true;
+      } else {
+        CTL_ERR("Model failed to load with status code: " << res->status);
+        return cpp::fail("Model failed to load with status code: " +
+                         res->status);
+      }
+    } else {
+      auto err = res.error();
+      CTL_ERR("HTTP error: " << httplib::to_string(err));
+      return cpp::fail("HTTP error: " + httplib::to_string(err));
+    }
+
+  } catch (const std::exception& e) {
+    return cpp::fail("Fail to load model with ID '" + model_handle +
+                     "': " + e.what());
+  }
+}
+
+cpp::result<bool, std::string> ModelService::StopModel(
+    const std::string& host, int port, const std::string& model_handle) {
+  cortex::db::Models modellist_handler;
+  config::YamlHandler yaml_handler;
+
+  try {
+    auto model_entry = modellist_handler.GetModelInfo(model_handle);
+    if (model_entry.has_error()) {
+      CTL_WRN("Error: " + model_entry.error());
+      return cpp::fail(model_entry.error());
+    }
+    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
+    auto mc = yaml_handler.GetModelConfig();
+
+    httplib::Client cli(host + ":" + std::to_string(port));
+
+    Json::Value json_data;
+    json_data["model"] = model_handle;
+    json_data["engine"] = mc.engine;
+    auto data_str = json_data.toStyledString();
+    CTL_INF(data_str);
+    cli.set_read_timeout(std::chrono::seconds(60));
+    auto res = cli.Post("/inferences/server/unloadmodel", httplib::Headers(),
+                        data_str.data(), data_str.size(), "application/json");
+    if (res) {
+      if (res->status == httplib::StatusCode::OK_200) {        
+        return true;
+      } else {
+        CTL_ERR("Model failed to unload with status code: " << res->status);
+        return cpp::fail("Model failed to unload with status code: " +
+                         res->status);
+      }
+    } else {
+      auto err = res.error();
+      CTL_ERR("HTTP error: " << httplib::to_string(err));
+      return cpp::fail("HTTP error: " + httplib::to_string(err));
+    }
+
+  } catch (const std::exception& e) {
+    return cpp::fail("Fail to unload model with ID '" + model_handle +
+                     "': " + e.what());
+  }
+}
+
+cpp::result<bool, std::string> ModelService::GetModelStatus(
+    const std::string& host, int port, const std::string& model_handle) {
+  cortex::db::Models modellist_handler;
+  config::YamlHandler yaml_handler;
+
+  try {
+    auto model_entry = modellist_handler.GetModelInfo(model_handle);
+    if (model_entry.has_error()) {
+      CTL_WRN("Error: " + model_entry.error());
+      return cpp::fail(model_entry.error());
+    }
+    yaml_handler.ModelConfigFromFile(model_entry.value().path_to_model_yaml);
+    auto mc = yaml_handler.GetModelConfig();
+
+    httplib::Client cli(host + ":" + std::to_string(port));
+    nlohmann::json json_data;
+    json_data["model"] = model_handle;
+    json_data["engine"] = mc.engine;
+
+    auto data_str = json_data.dump();
+
+    auto res = cli.Post("/inferences/server/modelstatus", httplib::Headers(),
+                        data_str.data(), data_str.size(), "application/json");
+    if (res) {
+      if (res->status == httplib::StatusCode::OK_200) {
+        return true;
+      } else {
+        CTL_INF("Model failed to get model status with status code: "
+                << res->status);
+        return cpp::fail("Model failed to get model status with status code: " +
+                         res->status);
+      }
+    } else {
+      auto err = res.error();
+      CTL_WRN("HTTP error: " << httplib::to_string(err));
+      return cpp::fail("HTTP error: " + httplib::to_string(err));
+    }
+  } catch (const std::exception& e) {
+    return cpp::fail("Fail to get model status with ID '" + model_handle +
+                     "': " + e.what());
+  }
+}
\ No newline at end of file
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 6ddc00d7c..433fb6177 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -29,6 +29,15 @@ class ModelService {
    */
   cpp::result<void, std::string> DeleteModel(const std::string& model_handle);
 
+  cpp::result<bool, std::string> StartModel(const std::string& host, int port,
+                                            const std::string& model_handle);
+
+  cpp::result<bool, std::string> StopModel(const std::string& host, int port,
+                                           const std::string& model_handle);
+
+  cpp::result<bool, std::string> GetModelStatus(
+      const std::string& host, int port, const std::string& model_handle);
+
   cpp::result<std::string, std::string> HandleUrl(const std::string& url,
                                                   bool async = false);
 

From f37a485f85d38abf70f630b9a57263e4f3edbfb3 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Tue, 1 Oct 2024 15:33:13 +0700
Subject: [PATCH 2/2] fix: fallback to cortex.llamacpp if does not have engine
 field

---
 engine/controllers/server.cc | 54 ++++++++++++++++++++++++------------
 engine/controllers/server.h  |  2 ++
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index f19b1412e..7dfc589ef 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -26,12 +26,14 @@ server::~server() {}
 void server::ChatCompletion(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!HasFieldInReq(req, callback, "engine")) {
-    return;
+  std::string engine_type;
+  if (!HasFieldInReq(req, "engine")) {
+    engine_type = kLlamaEngine;
+  } else {
+    engine_type =
+        (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
   }
 
-  auto engine_type =
-      (*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
   if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
@@ -89,12 +91,14 @@ void server::Embedding(const HttpRequestPtr& req,
 void server::UnloadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!HasFieldInReq(req, callback, "engine")) {
-    return;
+  std::string engine_type;
+  if (!HasFieldInReq(req, "engine")) {
+    engine_type = kLlamaEngine;
+  } else {
+    engine_type =
+        (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
   }
 
-  auto engine_type =
-      (*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
   if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
@@ -120,12 +124,14 @@ void server::UnloadModel(
 void server::ModelStatus(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!HasFieldInReq(req, callback, "engine")) {
-    return;
+  std::string engine_type;
+  if (!HasFieldInReq(req, "engine")) {
+    engine_type = kLlamaEngine;
+  } else {
+    engine_type =
+        (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
   }
 
-  auto engine_type =
-      (*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
   if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
@@ -347,8 +353,10 @@ void server::LoadModel(const HttpRequestPtr& req,
     if (engine_type == kLlamaEngine) {  //fix for llamacpp engine first
       auto config = file_manager_utils::GetCortexConfig();
       if (en->IsSupported("SetFileLogger")) {
-        en->SetFileLogger(config.maxLogLines, (std::filesystem::path(config.logFolderPath) /
-                                                  std::filesystem::path(config.logLlamaCppPath)).string());
+        en->SetFileLogger(config.maxLogLines,
+                          (std::filesystem::path(config.logFolderPath) /
+                           std::filesystem::path(config.logLlamaCppPath))
+                              .string());
       } else {
         LOG_WARN << "Method SetFileLogger is not supported yet";
       }
@@ -371,12 +379,14 @@ void server::LoadModel(const HttpRequestPtr& req,
 void server::UnloadEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
-  if (!HasFieldInReq(req, callback, "engine")) {
-    return;
+  std::string engine_type;
+  if (!HasFieldInReq(req, "engine")) {
+    engine_type = kLlamaEngine;
+  } else {
+    engine_type =
+        (*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
   }
 
-  auto engine_type =
-      (*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
   if (!IsEngineLoaded(engine_type)) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
@@ -468,4 +478,12 @@ bool server::HasFieldInReq(
   return true;
 }
 
+bool server::HasFieldInReq(const HttpRequestPtr& req,
+                           const std::string& field) {
+  if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) {
+    return false;
+  }
+  return true;
+}
+
 }  // namespace inferences
diff --git a/engine/controllers/server.h b/engine/controllers/server.h
index 58615517d..623825481 100644
--- a/engine/controllers/server.h
+++ b/engine/controllers/server.h
@@ -107,6 +107,8 @@ class server : public drogon::HttpController<server>,
                      std::function<void(const HttpResponsePtr&)>& callback,
                      const std::string& field);
 
+  bool HasFieldInReq(const HttpRequestPtr& req, const std::string& field);
+
  private:
   struct SyncQueue {
     void push(std::pair<Json::Value, Json::Value>&& p) {