janhq · namchuai · Sep 19, 2024 · Sep 15, 2024 · Sep 16, 2024 · Sep 18, 2024
diff --git a/engine/commands/model_get_cmd.cc b/engine/commands/model_get_cmd.cc
@@ -4,21 +4,16 @@
 #include <vector>
 #include "cmd_info.h"
 #include "config/yaml_config.h"
-#include "trantor/utils/Logger.h"
-#include "utils/cortex_utils.h"
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 
 namespace commands {
 
-ModelGetCmd::ModelGetCmd(std::string model_handle)
-    : model_handle_(std::move(model_handle)) {}
-
-void ModelGetCmd::Exec() {
+void ModelGetCmd::Exec(const std::string& model_handle) {
   auto models_path = file_manager_utils::GetModelsContainerPath();
   if (std::filesystem::exists(models_path) &&
       std::filesystem::is_directory(models_path)) {
-    CmdInfo ci(model_handle_);
+    CmdInfo ci(model_handle);
     std::string model_file =
         ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch;
     bool found_model = false;
@@ -149,4 +144,4 @@ void ModelGetCmd::Exec() {
     CLI_LOG("Model not found!");
   }
 }
-};  // namespace commands
+};  // namespace commands
diff --git a/engine/commands/model_get_cmd.h b/engine/commands/model_get_cmd.h
@@ -1,17 +1,10 @@
 #pragma once
 
-
-#include <cmath>  // For std::isnan
 #include <string>
 namespace commands {
 
 class ModelGetCmd {
  public:
-
-  ModelGetCmd(std::string model_handle);
-  void Exec();
-
- private:
-  std::string model_handle_;
+  void Exec(const std::string& model_handle);
 };
-}  // namespace commands
+}  // namespace commands
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
@@ -2,13 +2,8 @@
 #include "chat_cmd.h"
 #include "cmd_info.h"
 #include "config/yaml_config.h"
-#include "engine_install_cmd.h"
-#include "httplib.h"
-#include "model_pull_cmd.h"
 #include "model_start_cmd.h"
 #include "server_start_cmd.h"
-#include "trantor/utils/Logger.h"
-#include "utils/cortex_utils.h"
 #include "utils/file_manager_utils.h"
 
 namespace commands {
@@ -46,7 +41,7 @@ void RunCmd::Exec() {
     if (!commands::IsServerAlive(host_, port_)) {
       CLI_LOG("Starting server ...");
       commands::ServerStartCmd ssc;
-      if(!ssc.Exec(host_, port_)) {
+      if (!ssc.Exec(host_, port_)) {
         return;
       }
     }

diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
@@ -138,10 +138,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
       models_cmd->add_subcommand("get", "Get info of {model_id} locally");
   get_models_cmd->add_option("model_id", model_id, "");
   get_models_cmd->require_option();
-  get_models_cmd->callback([&model_id]() {
-    commands::ModelGetCmd command(model_id);
-    command.Exec();
-  });
+  get_models_cmd->callback(
+      [&model_id]() { commands::ModelGetCmd().Exec(model_id); });
 
   auto model_del_cmd =
       models_cmd->add_subcommand("delete", "Delete a model by ID locally");
@@ -238,7 +236,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   auto ps_cmd =
       app_.add_subcommand("ps", "Show running models and their status");
   ps_cmd->group(kSystemGroup);
-  
+
   CLI11_PARSE(app_, argc, argv);
   if (argc == 1) {
     CLI_LOG(app_.help());

diff --git a/engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py b/engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py
@@ -0,0 +1,12 @@
+from test_runner import popen
+
+
+class TestCliModelPullCortexsoWithSelection:
+
+    def test_pull_model_from_cortexso_should_display_list_and_allow_user_to_choose(
+        self,
+    ):
+        stdout, stderr, return_code = popen(["pull", "tinyllama"], "1\n")
+
+        assert "Model tinyllama downloaded successfully!" in stdout
+        assert return_code == 0
diff --git a/engine/e2e-test/test_cli_model_pull_direct_url.py b/engine/e2e-test/test_cli_model_pull_direct_url.py
@@ -1,17 +1,18 @@
-import platform
-
-import pytest
 from test_runner import run
 
 
 class TestCliModelPullDirectUrl:
 
-    @pytest.mark.skipif(True, reason="Expensive test. Only test when needed.")
     def test_model_pull_with_direct_url_should_be_success(self):
         exit_code, output, error = run(
-            "Pull model", ["pull", "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"],
-            timeout=None
+            "Pull model",
+            [
+                "pull",
+                "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
+            ],
+            timeout=None,
         )
         assert exit_code == 0, f"Model pull failed with error: {error}"
         # TODO: verify that the model has been pull successfully
-        # TODO: skip this test. since download model is taking too long
+        # TODO: skip this test. since download model is taking too long
+
diff --git a/engine/e2e-test/test_cli_model_pull_from_cortexso.py b/engine/e2e-test/test_cli_model_pull_from_cortexso.py
@@ -1,17 +1,16 @@
-import platform
-
 import pytest
 from test_runner import run
 
 
 class TestCliModelPullCortexso:
 
-    @pytest.mark.skipif(True, reason="Expensive test. Only test when needed.")
     def test_model_pull_with_direct_url_should_be_success(self):
         exit_code, output, error = run(
-            "Pull model", ["pull", "tinyllama"],
-            timeout=None
+            "Pull model",
+            ["pull", "tinyllama"],
+            timeout=None,
         )
         assert exit_code == 0, f"Model pull failed with error: {error}"
         # TODO: verify that the model has been pull successfully
-        # TODO: skip this test. since download model is taking too long
+        # TODO: skip this test. since download model is taking too long
+
diff --git a/engine/e2e-test/test_cli_model_pull_hugging_face_repository.py b/engine/e2e-test/test_cli_model_pull_hugging_face_repository.py
@@ -0,0 +1,28 @@
+import pytest
+from test_runner import popen
+
+
+class TestCliModelPullHuggingFaceRepository:
+
+    def test_model_pull_hugging_face_repository(self):
+        """
+        Test pull model pervll/bge-reranker-v2-gemma-Q4_K_M-GGUF from issue #1017
+        """
+
+        stdout, stderr, return_code = popen(
+            ["pull", "pervll/bge-reranker-v2-gemma-Q4_K_M-GGUF"], "1\n"
+        )
+
+        assert "downloaded successfully!" in stdout
+        assert return_code == 0
+
+    def test_model_pull_hugging_face_not_gguf_should_failed_gracefully(self):
+        """
+        When pull a model which is not GGUF, we stop and show a message to user
+        """
+
+        stdout, stderr, return_code = popen(["pull", "BAAI/bge-reranker-v2-m3"], "")
+        assert (
+            "Not a GGUF model. Currently, only GGUF single file is supported." in stdout
+        )
+        assert return_code == 0
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py
@@ -38,6 +38,26 @@ def run(test_name: str, arguments: List[str], timeout=timeout) -> (int, str, str
     return result.returncode, result.stdout, result.stderr
 
 
+def popen(arguments: List[str], user_input: str) -> (int, str, str):
+    # Start the process
+    executable_path = getExecutablePath()
+    process = subprocess.Popen(
+        [executable_path] + arguments,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,  # This ensures the input and output are treated as text
+    )
+
+    # Send input and get output
+    stdout, stderr = process.communicate(input=user_input)
+
+    # Get the return code
+    return_code = process.returncode
+
+    return stdout, stderr, return_code
+
+
 # Start the API server
 # Wait for `Server started` message or failed
 def start_server() -> bool:
@@ -50,10 +70,10 @@ def start_server() -> bool:
 def start_server_nix() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        [executable] + ['start', '-p', '3928'], 
-        stdout=subprocess.PIPE, 
-        stderr=subprocess.PIPE, 
-        text=True
+        [executable] + ["start", "-p", "3928"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
     )
 
     start_time = time.time()
@@ -80,7 +100,7 @@ def start_server_nix() -> bool:
 def start_server_windows() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        [executable] + ['start', '-p', '3928'],
+        [executable] + ["start", "-p", "3928"],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         text=True,

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -1,31 +1,63 @@
 #include "model_service.h"
 #include <filesystem>
 #include <iostream>
-#include "commands/cmd_info.h"
+#include <ostream>
+#include "utils/cli_selection_utils.h"
 #include "utils/cortexso_parser.h"
 #include "utils/file_manager_utils.h"
+#include "utils/huggingface_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/model_callback_utils.h"
-#include "utils/url_parser.h"
+#include "utils/string_utils.h"
 
 void ModelService::DownloadModel(const std::string& input) {
   if (input.empty()) {
     throw std::runtime_error(
         "Input must be Cortex Model Hub handle or HuggingFace url!");
   }
 
-  // case input is a direct url
-  auto url_obj = url_parser::FromUrlString(input);
-  // TODO: handle case user paste url from cortexso
-  if (url_obj.protocol == "https") {
-    if (url_obj.host != kHuggingFaceHost) {
-      CLI_LOG("Only huggingface.co is supported for now");
+  if (string_utils::StartsWith(input, "https://")) {
+    return DownloadModelByDirectUrl(input);
+  }
+
+  if (input.find("/") != std::string::npos) {
+    auto parsed = string_utils::SplitBy(input, "/");
+    if (parsed.size() != 2) {
+      throw std::runtime_error("Invalid model handle: " + input);
+    }
+
+    auto author = parsed[0];
+    auto model_name = parsed[1];
+    if (author == "cortexso") {
+      return DownloadModelByModelName(model_name);
+    }
+
+    DownloadHuggingFaceGgufModel(author, model_name, std::nullopt);
+    CLI_LOG("Model " << model_name << " downloaded successfully!")
+    return;
+  }
+
+  return DownloadModelByModelName(input);
+}
+
+void ModelService::DownloadModelByModelName(const std::string& modelName) {
+  try {
+    auto branches =
+        huggingface_utils::GetModelRepositoryBranches("cortexso", modelName);
+    std::vector<std::string> options{};
+    for (const auto& branch : branches) {
+      if (branch.name != "main") {
+        options.emplace_back(branch.name);
+      }
+    }
+    if (options.empty()) {
+      CLI_LOG("No variant found");
       return;
     }
-    return DownloadModelByDirectUrl(input);
-  } else {
-    commands::CmdInfo ci(input);
-    return DownloadModelFromCortexso(ci.model_name, ci.branch);
+    auto selection = cli_selection_utils::PrintSelection(options);
+    DownloadModelFromCortexso(modelName, selection.value());
+  } catch (const std::runtime_error& e) {
+    CLI_LOG("Error downloading model, " << e.what());
   }
 }
 
@@ -56,20 +88,14 @@ std::optional<config::ModelConfig> ModelService::GetDownloadedModel(
 }
 
 void ModelService::DownloadModelByDirectUrl(const std::string& url) {
-  // check for malformed url
-  // question: What if the url is from cortexso itself
-  // answer: then route to download from cortexso
   auto url_obj = url_parser::FromUrlString(url);
 
   if (url_obj.host == kHuggingFaceHost) {
-    // goto hugging face parser to normalize the url
-    // loop through path params, replace blob to resolve if any
     if (url_obj.pathParams[2] == "blob") {
       url_obj.pathParams[2] = "resolve";
     }
   }
 
-  // should separate this function out
   auto model_id{url_obj.pathParams[1]};
   auto file_name{url_obj.pathParams.back()};
 
@@ -86,7 +112,7 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {
 
   auto download_url = url_parser::FromUrl(url_obj);
   // this assume that the model being downloaded is a single gguf file
-  auto downloadTask{DownloadTask{.id = url_obj.pathParams.back(),
+  auto downloadTask{DownloadTask{.id = model_id,
                                  .type = DownloadType::Model,
                                  .items = {DownloadItem{
                                      .id = url_obj.pathParams.back(),
@@ -95,7 +121,7 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {
                                  }}}};
 
   auto on_finished = [](const DownloadTask& finishedTask) {
-    std::cout << "Download success" << std::endl;
+    CLI_LOG("Model " << finishedTask.id << " downloaded successfully!")
     auto gguf_download_item = finishedTask.items[0];
     model_callback_utils::ParseGguf(gguf_download_item);
   };
@@ -109,8 +135,38 @@ void ModelService::DownloadModelFromCortexso(const std::string& name,
   if (downloadTask.has_value()) {
     DownloadService().AddDownloadTask(downloadTask.value(),
                                       model_callback_utils::DownloadModelCb);
-    CTL_INF("Download finished");
+    CLI_LOG("Model " << name << " downloaded successfully!")
   } else {
     CTL_ERR("Model not found");
   }
 }
+
+void ModelService::DownloadHuggingFaceGgufModel(
+    const std::string& author, const std::string& modelName,
+    std::optional<std::string> fileName) {
+  auto repo_info =
+      huggingface_utils::GetHuggingFaceModelRepoInfo(author, modelName);
+  if (!repo_info.has_value()) {
+    // throw is better?
+    CTL_ERR("Model not found");
+    return;
+  }
+
+  if (!repo_info->gguf.has_value()) {
+    throw std::runtime_error(
+        "Not a GGUF model. Currently, only GGUF single file is supported.");
+  }
+
+  std::vector<std::string> options{};
+  for (const auto& sibling : repo_info->siblings) {
+    if (string_utils::EndsWith(sibling.rfilename, ".gguf")) {
+      options.push_back(sibling.rfilename);
+    }
+  }
+  auto selection = cli_selection_utils::PrintSelection(options);
+  std::cout << "Selected: " << selection.value() << std::endl;
+
+  auto download_url = huggingface_utils::GetDownloadableUrl(author, modelName,
+                                                            selection.value());
+  DownloadModelByDirectUrl(download_url);
+}