Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions engine/commands/model_get_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,16 @@
#include <vector>
#include "cmd_info.h"
#include "config/yaml_config.h"
#include "trantor/utils/Logger.h"
#include "utils/cortex_utils.h"
#include "utils/file_manager_utils.h"
#include "utils/logging_utils.h"

namespace commands {

ModelGetCmd::ModelGetCmd(std::string model_handle)
: model_handle_(std::move(model_handle)) {}

void ModelGetCmd::Exec() {
void ModelGetCmd::Exec(const std::string& model_handle) {
auto models_path = file_manager_utils::GetModelsContainerPath();
if (std::filesystem::exists(models_path) &&
std::filesystem::is_directory(models_path)) {
CmdInfo ci(model_handle_);
CmdInfo ci(model_handle);
std::string model_file =
ci.branch == "main" ? ci.model_name : ci.model_name + "-" + ci.branch;
bool found_model = false;
Expand Down Expand Up @@ -149,4 +144,4 @@ void ModelGetCmd::Exec() {
CLI_LOG("Model not found!");
}
}
}; // namespace commands
}; // namespace commands
11 changes: 2 additions & 9 deletions engine/commands/model_get_cmd.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
#pragma once


#include <cmath> // For std::isnan
#include <string>
namespace commands {

class ModelGetCmd {
public:

ModelGetCmd(std::string model_handle);
void Exec();

private:
std::string model_handle_;
void Exec(const std::string& model_handle);
};
} // namespace commands
} // namespace commands
7 changes: 1 addition & 6 deletions engine/commands/run_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,8 @@
#include "chat_cmd.h"
#include "cmd_info.h"
#include "config/yaml_config.h"
#include "engine_install_cmd.h"
#include "httplib.h"
#include "model_pull_cmd.h"
#include "model_start_cmd.h"
#include "server_start_cmd.h"
#include "trantor/utils/Logger.h"
#include "utils/cortex_utils.h"
#include "utils/file_manager_utils.h"

namespace commands {
Expand Down Expand Up @@ -46,7 +41,7 @@ void RunCmd::Exec() {
if (!commands::IsServerAlive(host_, port_)) {
CLI_LOG("Starting server ...");
commands::ServerStartCmd ssc;
if(!ssc.Exec(host_, port_)) {
if (!ssc.Exec(host_, port_)) {
return;
}
}
Expand Down
8 changes: 3 additions & 5 deletions engine/controllers/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,8 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
models_cmd->add_subcommand("get", "Get info of {model_id} locally");
get_models_cmd->add_option("model_id", model_id, "");
get_models_cmd->require_option();
get_models_cmd->callback([&model_id]() {
commands::ModelGetCmd command(model_id);
command.Exec();
});
get_models_cmd->callback(
[&model_id]() { commands::ModelGetCmd().Exec(model_id); });

auto model_del_cmd =
models_cmd->add_subcommand("delete", "Delete a model by ID locally");
Expand Down Expand Up @@ -238,7 +236,7 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
auto ps_cmd =
app_.add_subcommand("ps", "Show running models and their status");
ps_cmd->group(kSystemGroup);

CLI11_PARSE(app_, argc, argv);
if (argc == 1) {
CLI_LOG(app_.help());
Expand Down
12 changes: 12 additions & 0 deletions engine/e2e-test/test_cli_model_pull_cortexso_with_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from test_runner import popen


class TestCliModelPullCortexsoWithSelection:

def test_pull_model_from_cortexso_should_display_list_and_allow_user_to_choose(
self,
):
stdout, stderr, return_code = popen(["pull", "tinyllama"], "1\n")

assert "Model tinyllama downloaded successfully!" in stdout
assert return_code == 0
15 changes: 8 additions & 7 deletions engine/e2e-test/test_cli_model_pull_direct_url.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import platform

import pytest
from test_runner import run


class TestCliModelPullDirectUrl:

@pytest.mark.skipif(True, reason="Expensive test. Only test when needed.")
def test_model_pull_with_direct_url_should_be_success(self):
exit_code, output, error = run(
"Pull model", ["pull", "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"],
timeout=None
"Pull model",
[
"pull",
"https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
],
timeout=None,
)
assert exit_code == 0, f"Model pull failed with error: {error}"
# TODO: verify that the model has been pull successfully
# TODO: skip this test. since download model is taking too long
# TODO: skip this test. since download model is taking too long

11 changes: 5 additions & 6 deletions engine/e2e-test/test_cli_model_pull_from_cortexso.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import platform

import pytest
from test_runner import run


class TestCliModelPullCortexso:

@pytest.mark.skipif(True, reason="Expensive test. Only test when needed.")
def test_model_pull_with_direct_url_should_be_success(self):
exit_code, output, error = run(
"Pull model", ["pull", "tinyllama"],
timeout=None
"Pull model",
["pull", "tinyllama"],
timeout=None,
)
assert exit_code == 0, f"Model pull failed with error: {error}"
# TODO: verify that the model has been pull successfully
# TODO: skip this test. since download model is taking too long
# TODO: skip this test. since download model is taking too long

28 changes: 28 additions & 0 deletions engine/e2e-test/test_cli_model_pull_hugging_face_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest
from test_runner import popen


class TestCliModelPullHuggingFaceRepository:

def test_model_pull_hugging_face_repository(self):
"""
Test pull model pervll/bge-reranker-v2-gemma-Q4_K_M-GGUF from issue #1017
"""

stdout, stderr, return_code = popen(
["pull", "pervll/bge-reranker-v2-gemma-Q4_K_M-GGUF"], "1\n"
)

assert "downloaded successfully!" in stdout
assert return_code == 0

def test_model_pull_hugging_face_not_gguf_should_failed_gracefully(self):
"""
When pull a model which is not GGUF, we stop and show a message to user
"""

stdout, stderr, return_code = popen(["pull", "BAAI/bge-reranker-v2-m3"], "")
assert (
"Not a GGUF model. Currently, only GGUF single file is supported." in stdout
)
assert return_code == 0
30 changes: 25 additions & 5 deletions engine/e2e-test/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,26 @@ def run(test_name: str, arguments: List[str], timeout=timeout) -> (int, str, str
return result.returncode, result.stdout, result.stderr


def popen(arguments: List[str], user_input: str) -> (int, str, str):
# Start the process
executable_path = getExecutablePath()
process = subprocess.Popen(
[executable_path] + arguments,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True, # This ensures the input and output are treated as text
)

# Send input and get output
stdout, stderr = process.communicate(input=user_input)

# Get the return code
return_code = process.returncode

return stdout, stderr, return_code


# Start the API server
# Wait for `Server started` message or failed
def start_server() -> bool:
Expand All @@ -50,10 +70,10 @@ def start_server() -> bool:
def start_server_nix() -> bool:
executable = getExecutablePath()
process = subprocess.Popen(
[executable] + ['start', '-p', '3928'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
[executable] + ["start", "-p", "3928"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)

start_time = time.time()
Expand All @@ -80,7 +100,7 @@ def start_server_nix() -> bool:
def start_server_windows() -> bool:
executable = getExecutablePath()
process = subprocess.Popen(
[executable] + ['start', '-p', '3928'],
[executable] + ["start", "-p", "3928"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
Expand Down
98 changes: 77 additions & 21 deletions engine/services/model_service.cc
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
#include "model_service.h"
#include <filesystem>
#include <iostream>
#include "commands/cmd_info.h"
#include <ostream>
#include "utils/cli_selection_utils.h"
#include "utils/cortexso_parser.h"
#include "utils/file_manager_utils.h"
#include "utils/huggingface_utils.h"
#include "utils/logging_utils.h"
#include "utils/model_callback_utils.h"
#include "utils/url_parser.h"
#include "utils/string_utils.h"

void ModelService::DownloadModel(const std::string& input) {
if (input.empty()) {
throw std::runtime_error(
"Input must be Cortex Model Hub handle or HuggingFace url!");
}

// case input is a direct url
auto url_obj = url_parser::FromUrlString(input);
// TODO: handle case user paste url from cortexso
if (url_obj.protocol == "https") {
if (url_obj.host != kHuggingFaceHost) {
CLI_LOG("Only huggingface.co is supported for now");
if (string_utils::StartsWith(input, "https://")) {
return DownloadModelByDirectUrl(input);
}

if (input.find("/") != std::string::npos) {
auto parsed = string_utils::SplitBy(input, "/");
if (parsed.size() != 2) {
throw std::runtime_error("Invalid model handle: " + input);
}

auto author = parsed[0];
auto model_name = parsed[1];
if (author == "cortexso") {
return DownloadModelByModelName(model_name);
}

DownloadHuggingFaceGgufModel(author, model_name, std::nullopt);
CLI_LOG("Model " << model_name << " downloaded successfully!")
return;
}

return DownloadModelByModelName(input);
}

void ModelService::DownloadModelByModelName(const std::string& modelName) {
try {
auto branches =
huggingface_utils::GetModelRepositoryBranches("cortexso", modelName);
std::vector<std::string> options{};
for (const auto& branch : branches) {
if (branch.name != "main") {
options.emplace_back(branch.name);
}
}
if (options.empty()) {
CLI_LOG("No variant found");
return;
}
return DownloadModelByDirectUrl(input);
} else {
commands::CmdInfo ci(input);
return DownloadModelFromCortexso(ci.model_name, ci.branch);
auto selection = cli_selection_utils::PrintSelection(options);
DownloadModelFromCortexso(modelName, selection.value());
} catch (const std::runtime_error& e) {
CLI_LOG("Error downloading model, " << e.what());
}
}

Expand Down Expand Up @@ -56,20 +88,14 @@ std::optional<config::ModelConfig> ModelService::GetDownloadedModel(
}

void ModelService::DownloadModelByDirectUrl(const std::string& url) {
// check for malformed url
// question: What if the url is from cortexso itself
// answer: then route to download from cortexso
auto url_obj = url_parser::FromUrlString(url);

if (url_obj.host == kHuggingFaceHost) {
// goto hugging face parser to normalize the url
// loop through path params, replace blob to resolve if any
if (url_obj.pathParams[2] == "blob") {
url_obj.pathParams[2] = "resolve";
}
}

// should separate this function out
auto model_id{url_obj.pathParams[1]};
auto file_name{url_obj.pathParams.back()};

Expand All @@ -86,7 +112,7 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {

auto download_url = url_parser::FromUrl(url_obj);
// this assume that the model being downloaded is a single gguf file
auto downloadTask{DownloadTask{.id = url_obj.pathParams.back(),
auto downloadTask{DownloadTask{.id = model_id,
.type = DownloadType::Model,
.items = {DownloadItem{
.id = url_obj.pathParams.back(),
Expand All @@ -95,7 +121,7 @@ void ModelService::DownloadModelByDirectUrl(const std::string& url) {
}}}};

auto on_finished = [](const DownloadTask& finishedTask) {
std::cout << "Download success" << std::endl;
CLI_LOG("Model " << finishedTask.id << " downloaded successfully!")
auto gguf_download_item = finishedTask.items[0];
model_callback_utils::ParseGguf(gguf_download_item);
};
Expand All @@ -109,8 +135,38 @@ void ModelService::DownloadModelFromCortexso(const std::string& name,
if (downloadTask.has_value()) {
DownloadService().AddDownloadTask(downloadTask.value(),
model_callback_utils::DownloadModelCb);
CTL_INF("Download finished");
CLI_LOG("Model " << name << " downloaded successfully!")
} else {
CTL_ERR("Model not found");
}
}

void ModelService::DownloadHuggingFaceGgufModel(
const std::string& author, const std::string& modelName,
std::optional<std::string> fileName) {
auto repo_info =
huggingface_utils::GetHuggingFaceModelRepoInfo(author, modelName);
if (!repo_info.has_value()) {
// throw is better?
CTL_ERR("Model not found");
return;
}

if (!repo_info->gguf.has_value()) {
throw std::runtime_error(
"Not a GGUF model. Currently, only GGUF single file is supported.");
}

std::vector<std::string> options{};
for (const auto& sibling : repo_info->siblings) {
if (string_utils::EndsWith(sibling.rfilename, ".gguf")) {
options.push_back(sibling.rfilename);
}
}
auto selection = cli_selection_utils::PrintSelection(options);
std::cout << "Selected: " << selection.value() << std::endl;

auto download_url = huggingface_utils::GetDownloadableUrl(author, modelName,
selection.value());
DownloadModelByDirectUrl(download_url);
}
Loading