diff --git a/engine/commands/chat_completion_cmd.cc b/engine/commands/chat_completion_cmd.cc index 5a92c4b95..8d06ce4bc 100644 --- a/engine/commands/chat_completion_cmd.cc +++ b/engine/commands/chat_completion_cmd.cc @@ -77,7 +77,8 @@ void ChatCompletionCmd::Exec(const std::string& host, int port, } // Only check if llamacpp engine - if ((mc.engine.find("llamacpp") != std::string::npos) && + if ((mc.engine.find(kLlamaEngine) != std::string::npos || + mc.engine.find(kLlamaRepo) != std::string::npos) && !commands::ModelStatusCmd().IsLoaded(host, port, model_handle)) { CLI_LOG("Model is not loaded yet!"); return; diff --git a/engine/commands/ps_cmd.cc b/engine/commands/ps_cmd.cc index 2b2c8dc8d..5d5392565 100644 --- a/engine/commands/ps_cmd.cc +++ b/engine/commands/ps_cmd.cc @@ -4,6 +4,7 @@ #include #include #include "nlohmann/json.hpp" +#include "utils/engine_constants.h" #include "utils/format_utils.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" @@ -26,7 +27,8 @@ void PsCmd::Exec(const std::string& host, int port) { try { for (const auto& item : data) { ModelLoadedStatus model_status; - model_status.engine = item["engine"]; + // TODO(sang) hardcode for now + model_status.engine = kLlamaEngine; model_status.model = item["id"]; model_status.ram = item["ram"]; model_status.start_time = item["start_time"]; diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc index 1bf85afa6..b4023cfd3 100644 --- a/engine/commands/run_cmd.cc +++ b/engine/commands/run_cmd.cc @@ -11,6 +11,19 @@ namespace commands { +namespace { +std::string Repo2Engine(const std::string& r) { + if (r == kLlamaRepo) { + return kLlamaEngine; + } else if (r == kOnnxRepo) { + return kOnnxEngine; + } else if (r == kTrtLlmRepo) { + return kTrtLlmEngine; + } + return r; +}; +} // namespace + void RunCmd::Exec(bool chat_flag) { std::optional model_id = model_handle_; @@ -47,7 +60,9 @@ void RunCmd::Exec(bool chat_flag) { // Check if engine existed. If not, download it { - auto required_engine = engine_service_.GetEngineInfo(mc.engine); + auto required_engine = + engine_service_.GetEngineInfo(Repo2Engine(mc.engine)); + if (!required_engine.has_value()) { throw std::runtime_error("Engine not found: " + mc.engine); } diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc index 3d7cd53df..3324077c3 100644 --- a/engine/config/gguf_parser.cc +++ b/engine/config/gguf_parser.cc @@ -25,6 +25,7 @@ #include "gguf_parser.h" #include "trantor/utils/Logger.h" +#include "utils/engine_constants.h" namespace config { #define NOMINMAX @@ -401,7 +402,7 @@ void GGUFHandler::ModelConfigFromMetadata() { model_config_.frequency_penalty = 0; model_config_.presence_penalty = 0; model_config_.stream = true; - model_config_.engine = "cortex.llamacpp"; + model_config_.engine = kLlamaEngine; model_config_.created = std::time(nullptr); model_config_.model = "model"; model_config_.owned_by = ""; diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index bfefd2d7f..8bd34c109 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -4,8 +4,9 @@ #include #include -#include "utils/format_utils.h" +#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" +#include "utils/format_utils.h" #include "yaml_config.h" namespace config { // Method to read YAML file @@ -25,10 +26,12 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { std::replace(s.begin(), s.end(), '\\', '/'); std::vector v; if (yaml_node_["engine"] && - yaml_node_["engine"].as() == "cortex.llamacpp") { + (yaml_node_["engine"].as() == kLlamaRepo || + (yaml_node_["engine"].as() == kLlamaEngine))) { auto abs_path = s.substr(0, s.find_last_of('/')) + "/model.gguf"; auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path)); v.emplace_back(rel_path.string()); + } else { v.emplace_back(s.substr(0, s.find_last_of('/'))); } @@ -289,7 +292,8 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { outFile << "version: " << yaml_node_["version"].as() << "\n"; } if (yaml_node_["files"] && yaml_node_["files"].size()) { - outFile << "files: # Can be relative OR absolute local file path\n"; + outFile << "files: # Can be relative OR absolute local file " + "path\n"; for (const auto& source : yaml_node_["files"]) { outFile << " - " << source << "\n"; } diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 7dfc589ef..ffbf2cef3 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -3,18 +3,25 @@ #include "trantor/utils/Logger.h" #include "utils/cortex_utils.h" #include "utils/cpuid/cpu_info.h" +#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" using namespace inferences; using json = nlohmann::json; namespace inferences { namespace { -constexpr static auto kLlamaEngine = "cortex.llamacpp"; -constexpr static auto kPythonRuntimeEngine = "cortex.python"; -constexpr static auto kOnnxEngine = "cortex.onnx"; -constexpr static auto kTensorrtLlmEngine = "cortex.tensorrt-llm"; +// Need to change this after we rename repositories +std::string NormalizeEngine(const std::string& engine) { + if (engine == kLlamaEngine) { + return kLlamaRepo; + } else if (engine == kOnnxEngine) { + return kOnnxRepo; + } else if (engine == kTrtLlmEngine) { + return kTrtLlmRepo; + } + return engine; +}; } // namespace - server::server() { #if defined(_WIN32) SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS); @@ -28,13 +35,15 @@ void server::ChatCompletion( std::function&& callback) { std::string engine_type; if (!HasFieldInReq(req, "engine")) { - engine_type = kLlamaEngine; + engine_type = kLlamaRepo; } else { engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); } - if (!IsEngineLoaded(engine_type)) { + auto ne = NormalizeEngine(engine_type); + + if (!IsEngineLoaded(ne)) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -48,7 +57,7 @@ void server::ChatCompletion( auto json_body = req->getJsonObject(); bool is_stream = (*json_body).get("stream", false).asBool(); auto q = std::make_shared(); - std::get(engines_[engine_type].engine) + std::get(engines_[ne].engine) ->HandleChatCompletion(json_body, [q](Json::Value status, Json::Value res) { q->push(std::make_pair(status, res)); @@ -65,8 +74,9 @@ void server::ChatCompletion( void server::Embedding(const HttpRequestPtr& req, std::function&& callback) { auto engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); - if (!IsEngineLoaded(engine_type)) { + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); + auto ne = NormalizeEngine(engine_type); + if (!IsEngineLoaded(ne)) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -78,7 +88,7 @@ void server::Embedding(const HttpRequestPtr& req, LOG_TRACE << "Start embedding"; SyncQueue q; - std::get(engines_[engine_type].engine) + std::get(engines_[ne].engine) ->HandleEmbedding(req->getJsonObject(), [&q](Json::Value status, Json::Value res) { q.push(std::make_pair(status, res)); @@ -93,13 +103,14 @@ void server::UnloadModel( std::function&& callback) { std::string engine_type; if (!HasFieldInReq(req, "engine")) { - engine_type = kLlamaEngine; + engine_type = kLlamaRepo; } else { engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); } + auto ne = NormalizeEngine(engine_type); - if (!IsEngineLoaded(engine_type)) { + if (!IsEngineLoaded(ne)) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -109,7 +120,7 @@ void server::UnloadModel( return; } LOG_TRACE << "Start unload model"; - std::get(engines_[engine_type].engine) + std::get(engines_[ne].engine) ->UnloadModel( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { @@ -126,13 +137,15 @@ void server::ModelStatus( std::function&& callback) { std::string engine_type; if (!HasFieldInReq(req, "engine")) { - engine_type = kLlamaEngine; + engine_type = kLlamaRepo; } else { engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); } - if (!IsEngineLoaded(engine_type)) { + auto ne = NormalizeEngine(engine_type); + + if (!IsEngineLoaded(ne)) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -143,7 +156,7 @@ void server::ModelStatus( } LOG_TRACE << "Start to get model status"; - std::get(engines_[engine_type].engine) + std::get(engines_[ne].engine) ->GetModelStatus( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { @@ -213,7 +226,7 @@ void server::FineTuning( const HttpRequestPtr& req, std::function&& callback) { auto engine_type = - (*(req->getJsonObject())).get("engine", kPythonRuntimeEngine).asString(); + (*(req->getJsonObject())).get("engine", kPythonRuntimeRepo).asString(); if (engines_.find(engine_type) == engines_.end()) { try { @@ -267,23 +280,25 @@ void server::FineTuning( void server::LoadModel(const HttpRequestPtr& req, std::function&& callback) { auto engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); + + auto ne = NormalizeEngine(engine_type); // We have not loaded engine yet, should load it before using it - if (engines_.find(engine_type) == engines_.end()) { + if (engines_.find(ne) == engines_.end()) { auto get_engine_path = [](std::string_view e) { - if (e == kLlamaEngine) { + if (e == kLlamaRepo) { return cortex_utils::kLlamaLibPath; - } else if (e == kOnnxEngine) { + } else if (e == kOnnxRepo) { return cortex_utils::kOnnxLibPath; - } else if (e == kTensorrtLlmEngine) { + } else if (e == kTrtLlmRepo) { return cortex_utils::kTensorrtLlmPath; } return cortex_utils::kLlamaLibPath; }; try { - if (engine_type == kLlamaEngine) { + if (ne == kLlamaRepo) { cortex::cpuid::CpuInfo cpu_info; LOG_INFO << "CPU instruction set: " << cpu_info.to_string(); } @@ -292,7 +307,7 @@ void server::LoadModel(const HttpRequestPtr& req, (getenv("ENGINE_PATH") ? getenv("ENGINE_PATH") : file_manager_utils::GetCortexDataPath().string()) + - get_engine_path(engine_type); + get_engine_path(ne); #if defined(_WIN32) // TODO(?) If we only allow to load an engine at a time, the logic is simpler. // We would like to support running multiple engines at the same time. Therefore, @@ -313,28 +328,26 @@ void server::LoadModel(const HttpRequestPtr& req, } }; - if (IsEngineLoaded(kLlamaEngine) && engine_type == kTensorrtLlmEngine) { + if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo) { // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaEngine].cookie)) { - LOG_INFO << "Could not remove dll directory: " << kLlamaEngine; + if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { + LOG_INFO << "Could not remove dll directory: " << kLlamaRepo; } else { - LOG_WARN << "Removed dll directory: " << kLlamaEngine; + LOG_WARN << "Removed dll directory: " << kLlamaRepo; } - add_dll(engine_type, abs_path); - } else if (IsEngineLoaded(kTensorrtLlmEngine) && - engine_type == kLlamaEngine) { + add_dll(ne, abs_path); + } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { // Do nothing } else { - add_dll(engine_type, abs_path); + add_dll(ne, abs_path); } #endif - engines_[engine_type].dl = - std::make_unique(abs_path, "engine"); + engines_[ne].dl = std::make_unique(abs_path, "engine"); } catch (const cortex_cpp::dylib::load_error& e) { LOG_ERROR << "Could not load engine: " << e.what(); - engines_.erase(engine_type); + engines_.erase(ne); Json::Value res; res["message"] = "Could not load engine " + engine_type; @@ -343,14 +356,13 @@ void server::LoadModel(const HttpRequestPtr& req, callback(resp); return; } - cur_engine_type_ = engine_type; + cur_engine_type_ = ne; - auto func = - engines_[engine_type].dl->get_function("get_engine"); - engines_[engine_type].engine = func(); + auto func = engines_[ne].dl->get_function("get_engine"); + engines_[ne].engine = func(); - auto& en = std::get(engines_[engine_type].engine); - if (engine_type == kLlamaEngine) { //fix for llamacpp engine first + auto& en = std::get(engines_[ne].engine); + if (ne == kLlamaRepo) { //fix for llamacpp engine first auto config = file_manager_utils::GetCortexConfig(); if (en->IsSupported("SetFileLogger")) { en->SetFileLogger(config.maxLogLines, @@ -365,7 +377,7 @@ void server::LoadModel(const HttpRequestPtr& req, } LOG_TRACE << "Load model"; - auto& en = std::get(engines_[engine_type].engine); + auto& en = std::get(engines_[ne].engine); en->LoadModel(req->getJsonObject(), [cb = std::move(callback)]( Json::Value status, Json::Value res) { auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -381,13 +393,15 @@ void server::UnloadEngine( std::function&& callback) { std::string engine_type; if (!HasFieldInReq(req, "engine")) { - engine_type = kLlamaEngine; + engine_type = kLlamaRepo; } else { engine_type = - (*(req->getJsonObject())).get("engine", kLlamaEngine).asString(); + (*(req->getJsonObject())).get("engine", kLlamaRepo).asString(); } - if (!IsEngineLoaded(engine_type)) { + auto ne = NormalizeEngine(engine_type); + + if (!IsEngineLoaded(ne)) { Json::Value res; res["message"] = "Engine is not loaded yet"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); @@ -397,16 +411,16 @@ void server::UnloadEngine( return; } - EngineI* e = std::get(engines_[engine_type].engine); + EngineI* e = std::get(engines_[ne].engine); delete e; #if defined(_WIN32) - if (!RemoveDllDirectory(engines_[engine_type].cookie)) { + if (!RemoveDllDirectory(engines_[ne].cookie)) { LOG_WARN << "Could not remove dll directory: " << engine_type; } else { LOG_INFO << "Removed dll directory: " << engine_type; } #endif - engines_.erase(engine_type); + engines_.erase(ne); LOG_INFO << "Unloaded engine " + engine_type; Json::Value res; res["message"] = "Unloaded engine " + engine_type; diff --git a/engine/e2e-test/test_api_engine_get.py b/engine/e2e-test/test_api_engine_get.py index 6627c7926..baa9c8037 100644 --- a/engine/e2e-test/test_api_engine_get.py +++ b/engine/e2e-test/test_api_engine_get.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_engines_get_llamacpp_should_be_successful(self): - response = requests.get("http://localhost:3928/engines/cortex.llamacpp") + response = requests.get("http://localhost:3928/engines/llama-cpp") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_engine_install.py b/engine/e2e-test/test_api_engine_install.py index fbc7c6639..749b45dd3 100644 --- a/engine/e2e-test/test_api_engine_install.py +++ b/engine/e2e-test/test_api_engine_install.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_engines_install_llamacpp_should_be_successful(self): - response = requests.post("http://localhost:3928/engines/install/cortex.llamacpp") + response = requests.post("http://localhost:3928/engines/install/llama-cpp") assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py index be6223df6..c171be8ee 100644 --- a/engine/e2e-test/test_api_engine_uninstall.py +++ b/engine/e2e-test/test_api_engine_uninstall.py @@ -18,5 +18,5 @@ def setup_and_teardown(self): stop_server() def test_engines_uninstall_llamacpp_should_be_successful(self): - response = requests.delete("http://localhost:3928/engines/cortex.llamacpp") + response = requests.delete("http://localhost:3928/engines/llama-cpp") assert response.status_code == 200 diff --git a/engine/e2e-test/test_cli_engine_get.py b/engine/e2e-test/test_cli_engine_get.py index 6b5270eba..dd3dffbde 100644 --- a/engine/e2e-test/test_cli_engine_get.py +++ b/engine/e2e-test/test_cli_engine_get.py @@ -9,49 +9,49 @@ class TestCliEngineGet: @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test") def test_engines_get_tensorrt_llm_should_not_be_incompatible(self): exit_code, output, error = run( - "Get engine", ["engines", "get", "cortex.tensorrt-llm"] + "Get engine", ["engines", "get", "tensorrt-llm"] ) assert exit_code == 0, f"Get engine failed with error: {error}" assert ( "Incompatible" not in output - ), "cortex.tensorrt-llm should be Ready or Not Installed on Windows" + ), "tensorrt-llm should be Ready or Not Installed on Windows" @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test") def test_engines_get_onnx_should_not_be_incompatible(self): - exit_code, output, error = run("Get engine", ["engines", "get", "cortex.onnx"]) + exit_code, output, error = run("Get engine", ["engines", "get", "onnxruntime"]) assert exit_code == 0, f"Get engine failed with error: {error}" assert ( "Incompatible" not in output - ), "cortex.onnx should be Ready or Not Installed on Windows" + ), "onnxruntime should be Ready or Not Installed on Windows" def test_engines_get_llamacpp_should_not_be_incompatible(self): exit_code, output, error = run( - "Get engine", ["engines", "get", "cortex.llamacpp"] + "Get engine", ["engines", "get", "llama-cpp"] ) assert exit_code == 0, f"Get engine failed with error: {error}" assert ( "Incompatible" not in output - ), "cortex.llamacpp should be compatible for Windows, MacOs and Linux" + ), "llama-cpp should be compatible for Windows, MacOs and Linux" @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") def test_engines_get_tensorrt_llm_should_be_incompatible_on_macos(self): exit_code, output, error = run( - "Get engine", ["engines", "get", "cortex.tensorrt-llm"] + "Get engine", ["engines", "get", "tensorrt-llm"] ) assert exit_code == 0, f"Get engine failed with error: {error}" assert ( "Incompatible" in output - ), "cortex.tensorrt-llm should be Incompatible on MacOS" + ), "tensorrt-llm should be Incompatible on MacOS" @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") def test_engines_get_onnx_should_be_incompatible_on_macos(self): - exit_code, output, error = run("Get engine", ["engines", "get", "cortex.onnx"]) + exit_code, output, error = run("Get engine", ["engines", "get", "onnxruntime"]) assert exit_code == 0, f"Get engine failed with error: {error}" - assert "Incompatible" in output, "cortex.onnx should be Incompatible on MacOS" + assert "Incompatible" in output, "onnxruntime should be Incompatible on MacOS" @pytest.mark.skipif(platform.system() != "Linux", reason="Linux-specific test") def test_engines_get_onnx_should_be_incompatible_on_linux(self): - exit_code, output, error = run("Get engine", ["engines", "get", "cortex.onnx"]) + exit_code, output, error = run("Get engine", ["engines", "get", "onnxruntime"]) print(output) assert exit_code == 0, f"Get engine failed with error: {error}" - assert "Incompatible" in output, "cortex.onnx should be Incompatible on Linux" + assert "Incompatible" in output, "onnxruntime should be Incompatible on Linux" diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py index a3142c455..b4c27f3ef 100644 --- a/engine/e2e-test/test_cli_engine_install.py +++ b/engine/e2e-test/test_cli_engine_install.py @@ -9,7 +9,7 @@ class TestCliEngineInstall: def test_engines_install_llamacpp_should_be_successfully(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.llamacpp"], timeout=None + "Install Engine", ["engines", "install", "llama-cpp"], timeout=None ) assert "Start downloading" in output, "Should display downloading message" assert exit_code == 0, f"Install engine failed with error: {error}" @@ -17,7 +17,7 @@ def test_engines_install_llamacpp_should_be_successfully(self): @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") def test_engines_install_onnx_on_macos_should_be_failed(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.onnx"] + "Install Engine", ["engines", "install", "onnxruntime"] ) assert "No variant found" in output, "Should display error message" assert exit_code == 0, f"Install engine failed with error: {error}" @@ -25,28 +25,28 @@ def test_engines_install_onnx_on_macos_should_be_failed(self): @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") def test_engines_install_onnx_on_tensorrt_should_be_failed(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.tensorrt-llm"] + "Install Engine", ["engines", "install", "tensorrt-llm"] ) assert "No variant found" in output, "Should display error message" assert exit_code == 0, f"Install engine failed with error: {error}" def test_engines_install_pre_release_llamacpp(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.llamacpp", "-v", "v0.1.29"], timeout=600 + "Install Engine", ["engines", "install", "llama-cpp", "-v", "v0.1.29"], timeout=600 ) assert "Start downloading" in output, "Should display downloading message" assert exit_code == 0, f"Install engine failed with error: {error}" def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", tempfile.gettempdir()], timeout=None + "Install Engine", ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()], timeout=None ) assert "Start downloading" in output, "Should display downloading message" assert exit_code == 0, f"Install engine failed with error: {error}" def test_engines_should_not_perform_with_dummy_path(self): exit_code, output, error = run( - "Install Engine", ["engines", "install", "cortex.llamacpp", "-s", "abcpod"], timeout=None + "Install Engine", ["engines", "install", "llama-cpp", "-s", "abcpod"], timeout=None ) assert "Folder does not exist" in output, "Should display error" assert exit_code == 0, f"Install engine failed with error: {error}" diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/test_cli_engine_list.py index 38faa75d0..10f7470be 100644 --- a/engine/e2e-test/test_cli_engine_list.py +++ b/engine/e2e-test/test_cli_engine_list.py @@ -9,16 +9,16 @@ class TestCliEngineList: def test_engines_list_run_successfully_on_windows(self): exit_code, output, error = run("List engines", ["engines", "list"]) assert exit_code == 0, f"List engines failed with error: {error}" - assert "llama.cpp" in output + assert "llama-cpp" in output @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") def test_engines_list_run_successfully_on_macos(self): exit_code, output, error = run("List engines", ["engines", "list"]) assert exit_code == 0, f"List engines failed with error: {error}" - assert "llama.cpp" in output + assert "llama-cpp" in output @pytest.mark.skipif(platform.system() != "Linux", reason="Linux-specific test") def test_engines_list_run_successfully_on_linux(self): exit_code, output, error = run("List engines", ["engines", "list"]) assert exit_code == 0, f"List engines failed with error: {error}" - assert "llama.cpp" in output \ No newline at end of file + assert "llama-cpp" in output \ No newline at end of file diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py index 685e5387f..c53b6f922 100644 --- a/engine/e2e-test/test_cli_engine_uninstall.py +++ b/engine/e2e-test/test_cli_engine_uninstall.py @@ -8,17 +8,17 @@ class TestCliEngineUninstall: def setup_and_teardown(self): # Setup # Preinstall llamacpp engine - run("Install Engine", ["engines", "install", "cortex.llamacpp"],timeout = None) + run("Install Engine", ["engines", "install", "llama-cpp"],timeout = None) yield # Teardown # Clean up, removing installed engine - run("Uninstall Engine", ["engines", "uninstall", "cortex.llamacpp"]) + run("Uninstall Engine", ["engines", "uninstall", "llama-cpp"]) def test_engines_uninstall_llamacpp_should_be_successfully(self): exit_code, output, error = run( - "Uninstall engine", ["engines", "uninstall", "cortex.llamacpp"] + "Uninstall engine", ["engines", "uninstall", "llama-cpp"] ) - assert "Engine cortex.llamacpp uninstalled successfully!" in output + assert "Engine llama-cpp uninstalled successfully!" in output assert exit_code == 0, f"Install engine failed with error: {error}" diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 913be52f6..0e6da63c6 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -3,6 +3,7 @@ #include #include "algorithm" #include "utils/archive_utils.h" +#include "utils/engine_constants.h" #include "utils/engine_matcher_utils.h" #include "utils/file_manager_utils.h" #include "utils/json.hpp" @@ -17,7 +18,7 @@ namespace { std::string GetSuitableCudaVersion(const std::string& engine, const std::string& cuda_driver_version) { auto suitable_toolkit_version = ""; - if (engine == "cortex.tensorrt-llm") { + if (engine == kTrtLlmRepo || engine == kTrtLlmEngine) { // for tensorrt-llm, we need to download cuda toolkit v12.4 suitable_toolkit_version = "12.4"; } else { @@ -32,6 +33,18 @@ std::string GetSuitableCudaVersion(const std::string& engine, } return suitable_toolkit_version; } + +// Need to change this after we rename repositories +std::string NormalizeEngine(const std::string& engine) { + if (engine == kLlamaEngine) { + return kLlamaRepo; + } else if (engine == kOnnxEngine) { + return kOnnxRepo; + } else if (engine == kTrtLlmEngine) { + return kTrtLlmRepo; + } + return engine; +}; } // namespace EngineService::EngineService() @@ -59,38 +72,36 @@ std::vector EngineService::GetEngineInfoList() const { std::string onnx_status{kIncompatible}; std::string llamacpp_status = - std::filesystem::exists(ecp / "cortex.llamacpp") ? kReady : kNotInstalled; + std::filesystem::exists(ecp / kLlamaRepo) ? kReady : kNotInstalled; std::string tensorrt_status{kIncompatible}; #ifdef _WIN32 onnx_status = - std::filesystem::exists(ecp / "cortex.onnx") ? kReady : kNotInstalled; - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? kReady - : kNotInstalled; + std::filesystem::exists(ecp / kOnnxRepo) ? kReady : kNotInstalled; + tensorrt_status = + std::filesystem::exists(ecp / kTrtLlmRepo) ? kReady : kNotInstalled; #elif defined(__linux__) - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? kReady - : kNotInstalled; + tensorrt_status = + std::filesystem::exists(ecp / kTrtLlmRepo) ? kReady : kNotInstalled; #endif std::vector engines = { - {.name = "cortex.onnx", + {.name = kOnnxEngine, .description = "This extension enables chat completion API calls using " "the Onnx engine", .format = "ONNX", - .product_name = "ONNXRuntime", + .product_name = kOnnxEngine, .status = onnx_status}, - {.name = "cortex.llamacpp", + {.name = kLlamaEngine, .description = "This extension enables chat completion API calls using " "the LlamaCPP engine", .format = "GGUF", - .product_name = "llama.cpp", + .product_name = kLlamaEngine, .status = llamacpp_status}, - {.name = "cortex.tensorrt-llm", + {.name = kTrtLlmEngine, .description = "This extension enables chat completion API calls using " "the TensorrtLLM engine", .format = "TensorRT Engines", - .product_name = "TensorRT-LLM", + .product_name = kTrtLlmEngine, .status = tensorrt_status}, }; @@ -98,7 +109,7 @@ std::vector EngineService::GetEngineInfoList() const { if (engine.status == kReady) { // try to read the version.txt auto engine_info_path = file_manager_utils::GetEnginesContainerPath() / - engine.name / "version.txt"; + NormalizeEngine(engine.name) / "version.txt"; if (!std::filesystem::exists(engine_info_path)) { continue; } @@ -119,15 +130,15 @@ std::vector EngineService::GetEngineInfoList() const { cpp::result EngineService::InstallEngine( const std::string& engine, const std::string& version, const std::string& src) { - + auto ne = NormalizeEngine(engine); if (!src.empty()) { - return UnzipEngine(engine, version, src); + return UnzipEngine(ne, version, src); } else { - auto result = DownloadEngine(engine, version); + auto result = DownloadEngine(ne, version); if (result.has_error()) { return result; } - return DownloadCuda(engine); + return DownloadCuda(ne); } } @@ -198,20 +209,21 @@ cpp::result EngineService::UnzipEngine( cpp::result EngineService::UninstallEngine( const std::string& engine) { + auto ne = NormalizeEngine(engine); auto ecp = file_manager_utils::GetEnginesContainerPath(); - auto engine_path = ecp / engine; + auto engine_path = ecp / ne; if (!std::filesystem::exists(engine_path)) { - return cpp::fail("Engine " + engine + " is not installed!"); + return cpp::fail("Engine " + ne + " is not installed!"); } try { std::filesystem::remove_all(engine_path); - CTL_INF("Engine " << engine << " uninstalled successfully!"); + CTL_INF("Engine " << ne << " uninstalled successfully!"); return true; } catch (const std::exception& e) { - CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what()); - return cpp::fail("Failed to uninstall engine " + engine + ": " + e.what()); + CTL_ERR("Failed to uninstall engine " << ne << ": " << e.what()); + return cpp::fail("Failed to uninstall engine " + ne + ": " + e.what()); } } @@ -345,7 +357,8 @@ cpp::result EngineService::DownloadEngine( cpp::result EngineService::DownloadCuda( const std::string& engine) { - if (hw_inf_.sys_inf->os == "mac" || engine == "cortex.onnx") { + if (hw_inf_.sys_inf->os == "mac" || engine == kOnnxRepo || + engine == kOnnxEngine) { // mac and onnx engine does not require cuda toolkit return true; } @@ -414,13 +427,13 @@ cpp::result EngineService::DownloadCuda( std::string EngineService::GetMatchedVariant( const std::string& engine, const std::vector& variants) { std::string matched_variant; - if (engine == "cortex.tensorrt-llm") { + if (engine == kTrtLlmRepo || engine == kTrtLlmEngine) { matched_variant = engine_matcher_utils::ValidateTensorrtLlm( variants, hw_inf_.sys_inf->os, hw_inf_.cuda_driver_version); - } else if (engine == "cortex.onnx") { + } else if (engine == kOnnxRepo || engine == kOnnxEngine) { matched_variant = engine_matcher_utils::ValidateOnnx( variants, hw_inf_.sys_inf->os, hw_inf_.sys_inf->arch); - } else if (engine == "cortex.llamacpp") { + } else if (engine == kLlamaRepo || engine == kLlamaEngine) { auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(hw_inf_.cpu_inf); matched_variant = engine_matcher_utils::Validate( diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 102c3b121..384ccf095 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -6,8 +6,8 @@ #include #include #include "utils/cpuid/cpu_info.h" +#include "utils/engine_constants.h" #include "utils/result.hpp" - struct EngineInfo { std::string name; std::string description; @@ -28,7 +28,7 @@ class EngineService { constexpr static auto kNotInstalled = "Not Installed"; const std::vector kSupportEngines = { - "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; EngineService(); ~EngineService(); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index c4b6ae2ab..61ce7bbc3 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -8,6 +8,7 @@ #include "database/models.h" #include "httplib.h" #include "utils/cli_selection_utils.h" +#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" #include "utils/huggingface_utils.h" #include "utils/logging_utils.h" @@ -384,7 +385,7 @@ cpp::result ModelService::DeleteModel( // Remove model files if they are not imported locally if (model_entry.value().branch_name != "imported") { if (mc.files.size() > 0) { - if (mc.engine == "cortex.llamacpp") { + if (mc.engine == kLlamaRepo || mc.engine == kLlamaEngine) { for (auto& file : mc.files) { std::filesystem::path gguf_p( fmu::ToAbsoluteCortexDataPath(fs::path(file))); diff --git a/engine/templates/linux/postinst b/engine/templates/linux/postinst index 56e6cbcba..dc954487e 100644 --- a/engine/templates/linux/postinst +++ b/engine/templates/linux/postinst @@ -2,4 +2,4 @@ DESTINATION_BINARY_NAME=cortex USER_TO_RUN_AS=${SUDO_USER:-$(whoami)} echo "Download cortex.llamacpp engines by default for user $USER_TO_RUN_AS" -sudo -u $USER_TO_RUN_AS env PATH=$PATH:/usr/lib/wsl/lib /usr/bin/$DESTINATION_BINARY_NAME engines install cortex.llamacpp +sudo -u $USER_TO_RUN_AS env PATH=$PATH:/usr/lib/wsl/lib /usr/bin/$DESTINATION_BINARY_NAME engines install llama-cpp diff --git a/engine/templates/linux/postinst_local b/engine/templates/linux/postinst_local index 5453552cf..44908a33f 100644 --- a/engine/templates/linux/postinst_local +++ b/engine/templates/linux/postinst_local @@ -2,5 +2,5 @@ DESTINATION_BINARY_NAME=cortex USER_TO_RUN_AS=${SUDO_USER:-$(whoami)} echo "Download cortex.llamacpp engines by default for user $USER_TO_RUN_AS" -sudo -u $USER_TO_RUN_AS env PATH=$PATH:/usr/lib/wsl/lib /usr/bin/$DESTINATION_BINARY_NAME engines install cortex.llamacpp -s /tmp/$DESTINATION_BINARY_NAME-dependencies +sudo -u $USER_TO_RUN_AS env PATH=$PATH:/usr/lib/wsl/lib /usr/bin/$DESTINATION_BINARY_NAME engines install llama-cpp -s /tmp/$DESTINATION_BINARY_NAME-dependencies rm -rf /tmp/$DESTINATION_BINARY_NAME-dependencies diff --git a/engine/templates/macos/postinstall b/engine/templates/macos/postinstall index 551f98dcf..c13ed1b74 100644 --- a/engine/templates/macos/postinstall +++ b/engine/templates/macos/postinstall @@ -7,7 +7,7 @@ CONFIGURATION_FILE_NAME=.cortexrc USER_TO_RUN_AS=$(stat -f "%Su" /dev/console) echo "Download cortex.llamacpp engines by default for user $USER_TO_RUN_AS" -sudo -u $USER_TO_RUN_AS /usr/local/bin/$DESTINATION_BINARY_NAME engines install cortex.llamacpp +sudo -u $USER_TO_RUN_AS /usr/local/bin/$DESTINATION_BINARY_NAME engines install llama-cpp sudo chown -R $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$DATA_FOLDER_NAME" sudo chown $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$CONFIGURATION_FILE_NAME" diff --git a/engine/templates/macos/postinstall_local b/engine/templates/macos/postinstall_local index f2ea8e454..47d851de3 100644 --- a/engine/templates/macos/postinstall_local +++ b/engine/templates/macos/postinstall_local @@ -7,7 +7,7 @@ CONFIGURATION_FILE_NAME=.cortexrc USER_TO_RUN_AS=$(stat -f "%Su" /dev/console) echo "Download cortex.llamacpp engines by default for user $USER_TO_RUN_AS" -sudo -u $USER_TO_RUN_AS /usr/local/bin/$DESTINATION_BINARY_NAME engines install cortex.llamacpp -s ./dependencies +sudo -u $USER_TO_RUN_AS /usr/local/bin/$DESTINATION_BINARY_NAME engines install llama-cpp -s ./dependencies sudo chown -R $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$DATA_FOLDER_NAME" sudo chown $USER_TO_RUN_AS:staff "/Users/$USER_TO_RUN_AS/$CONFIGURATION_FILE_NAME" diff --git a/engine/templates/windows/installer-beta.iss b/engine/templates/windows/installer-beta.iss index 71119a182..be341c57b 100644 --- a/engine/templates/windows/installer-beta.iss +++ b/engine/templates/windows/installer-beta.iss @@ -60,7 +60,7 @@ begin WizardForm.ProgressGauge.Update; // Download llamacpp engine by default - CortexInstallCmd := Format('"%s\cortex-beta.exe" engines install cortex.llamacpp', [ExpandedAppDir]); + CortexInstallCmd := Format('"%s\cortex-beta.exe" engines install llama-cpp', [ExpandedAppDir]); Exec('cmd.exe', '/C ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Set the progress bar to 90% after downloading the engine diff --git a/engine/templates/windows/installer-nightly.iss b/engine/templates/windows/installer-nightly.iss index 80c5b6358..1f85e5f46 100644 --- a/engine/templates/windows/installer-nightly.iss +++ b/engine/templates/windows/installer-nightly.iss @@ -60,7 +60,7 @@ begin WizardForm.ProgressGauge.Update; // Download llamacpp engine by default - CortexInstallCmd := Format('"%s\cortex-nightly.exe" engines install cortex.llamacpp', [ExpandedAppDir]); + CortexInstallCmd := Format('"%s\cortex-nightly.exe" engines install llama-cpp', [ExpandedAppDir]); Exec('cmd.exe', '/C ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Set the progress bar to 90% after downloading the engine diff --git a/engine/templates/windows/installer.iss b/engine/templates/windows/installer.iss index f10125ab8..8b113b8f3 100644 --- a/engine/templates/windows/installer.iss +++ b/engine/templates/windows/installer.iss @@ -60,7 +60,7 @@ begin WizardForm.ProgressGauge.Update; // Download llamacpp engine by default - CortexInstallCmd := Format('"%s\cortex.exe" engines install cortex.llamacpp', [ExpandedAppDir]); + CortexInstallCmd := Format('"%s\cortex.exe" engines install llama-cpp', [ExpandedAppDir]); Exec('cmd.exe', '/C ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Set the progress bar to 90% after downloading the engine diff --git a/engine/templates/windows/local-installer-beta.iss b/engine/templates/windows/local-installer-beta.iss index 39f66d486..ef751e047 100644 --- a/engine/templates/windows/local-installer-beta.iss +++ b/engine/templates/windows/local-installer-beta.iss @@ -52,7 +52,7 @@ begin WizardForm.StatusLabel.Update; // Run the installation command with the -s option pointing to TempFolder - CortexInstallCmd := Format('"%s\cortex-beta.exe" engines install cortex.llamacpp -s "%s"', [ExpandedAppDir, TempFolder]); + CortexInstallCmd := Format('"%s\cortex-beta.exe" engines install llama-cpp -s "%s"', [ExpandedAppDir, TempFolder]); Exec('powershell.exe', '-Command ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Clear the status message after completion diff --git a/engine/templates/windows/local-installer-nightly.iss b/engine/templates/windows/local-installer-nightly.iss index 3cf9a56f1..d8255e68b 100644 --- a/engine/templates/windows/local-installer-nightly.iss +++ b/engine/templates/windows/local-installer-nightly.iss @@ -52,7 +52,7 @@ begin WizardForm.StatusLabel.Update; // Run the installation command with the -s option pointing to TempFolder - CortexInstallCmd := Format('"%s\cortex-nightly.exe" engines install cortex.llamacpp -s "%s"', [ExpandedAppDir, TempFolder]); + CortexInstallCmd := Format('"%s\cortex-nightly.exe" engines install llama-cpp -s "%s"', [ExpandedAppDir, TempFolder]); Exec('powershell.exe', '-Command ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Clear the status message after completion diff --git a/engine/templates/windows/local-installer.iss b/engine/templates/windows/local-installer.iss index 86a9cf280..77fb4ae17 100644 --- a/engine/templates/windows/local-installer.iss +++ b/engine/templates/windows/local-installer.iss @@ -52,7 +52,7 @@ begin WizardForm.StatusLabel.Update; // Run the installation command with the -s option pointing to TempFolder - CortexInstallCmd := Format('"%s\cortex.exe" engines install cortex.llamacpp -s "%s"', [ExpandedAppDir, TempFolder]); + CortexInstallCmd := Format('"%s\cortex.exe" engines install llama-cpp -s "%s"', [ExpandedAppDir, TempFolder]); Exec('powershell.exe', '-Command ' + CortexInstallCmd, '', SW_HIDE, ewWaitUntilTerminated, ResultCode); // Clear the status message after completion diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h new file mode 100644 index 000000000..63334b860 --- /dev/null +++ b/engine/utils/engine_constants.h @@ -0,0 +1,10 @@ +#pragma once + +constexpr const auto kOnnxEngine = "onnxruntime"; +constexpr const auto kLlamaEngine = "llama-cpp"; +constexpr const auto kTrtLlmEngine = "tensorrt-llm"; + +constexpr const auto kOnnxRepo = "cortex.onnx"; +constexpr const auto kLlamaRepo = "cortex.llamacpp"; +constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm"; +constexpr const auto kPythonRuntimeRepo = "cortex.python"; \ No newline at end of file