diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 1ac69d78e..da31ab64b 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -1234,9 +1234,9 @@ }, "/v1/engines/{name}": { "get": { - "operationId": "EnginesController_findOne", - "summary": "Get an engine", - "description": "Retrieves an engine instance, providing basic information about the engine.", + "operationId": "EnginesController_listInstalledEngines", + "summary": "List installed engines", + "description": "List installed engines for a particular engine type.", "parameters": [ { "name": "name", @@ -1292,10 +1292,12 @@ } }, "tags": ["Engines"] - }, - "post": { - "summary": "Install an engine", - "description": "Install an engine of a specific type, with optional version and variant", + } + }, + "/v1/engines/{name}/releases": { + "get": { + "summary": "List released engines", + "description": "List released engines of a specific engine type.", "parameters": [ { "name": "name", @@ -1307,32 +1309,156 @@ "default": "llama-cpp" }, "description": "The type of engine" - }, + } + ], + "responses": { + "200": { + "description": "Successful installation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "draft": { + "type": "boolean", + "example": false + }, + "name": { + "type": "string", + "example": "v0.1.39-20.11.24" + }, + "prerelease": { + "type": "boolean", + "example": true + }, + "published_at": { + "type": "string", + "format": "date-time", + "example": "2024-11-20T17:39:40Z" + }, + "url": { + "type": "string", + "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804" + } + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, + "/v1/engines/{name}/releases/latest": { + "get": { + "summary": "Get latest release", + "description": "Return variants for the latest engine release of a specific engine type.", + "parameters": [ { - "name": "version", - "in": "query", - "required": false, + "name": "name", + "in": "path", + "required": true, "schema": { - "type": "string" + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" }, - "description": "The version of the engine to install (optional)" - }, + "description": "The type of engine" + } + ], + "responses": { + "200": { + "description": "Successful installation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "created_at": { + "type": "string", + "format": "date-time", + "example": "2024-11-15T10:39:39Z" + }, + "download_count": { + "type": "integer", + "example": 76 + }, + "name": { + "type": "string", + "example": "0.1.39-linux-amd64-avx-cuda-11-7" + }, + "size": { + "type": "integer", + "example": 151215080 + } + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, + "/v1/engines/{name}/install": { + "post": { + "summary": "Install an engine", + "description": "Install an engine of a specific type, with optional version and variant. If none are provided, the latest version and most suitable variant will be installed.", + "parameters": [ { - "name": "variant", - "in": "query", - "required": false, + "name": "name", + "in": "path", + "required": true, "schema": { - "type": "string" + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" }, - "description": "The variant of the engine to install (optional)" + "description": "The type of engine" } ], + "requestBody": { + "required": false, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The version of the engine to install (optional)", + "example": "v0.1.39" + }, + "variant": { + "type": "string", + "description": "The variant of the engine to install (optional)", + "example": "mac-arm64" + } + } + } + } + } + }, "responses": { "200": { "description": "Successful installation", "content": { "application/json": { - "schema": {} + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "example": "Engine starts installing!" + } + } + } } } } @@ -1341,7 +1467,7 @@ }, "delete": { "summary": "Uninstall an engine", - "description": "Uninstall an engine based on type, version, and variant", + "description": "Uninstall an engine based on engine, version, and variant. If version and variant are not provided, all versions and variants of the engine will be uninstalled.", "parameters": [ { "name": "name", @@ -1353,26 +1479,30 @@ "default": "llama-cpp" }, "description": "The type of engine" - }, - { - "name": "version", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "The version of the engine to uninstall (optional)" - }, - { - "name": "variant", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "The variant of the engine to uninstall (optional)" } ], + "requestBody": { + "required": false, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The version of the engine to uninstall (optional)", + "example": "v0.1.39" + }, + "variant": { + "type": "string", + "description": "The variant of the engine to uninstall (optional)", + "example": "mac-arm64" + } + } + } + } + } + }, "responses": { "200": { "description": "Successful uninstallation", @@ -1381,28 +1511,10 @@ "schema": { "type": "object", "properties": { - "success": { - "type": "boolean", - "description": "Indicates if the uninstallation was successful" - }, "message": { "type": "string", - "description": "Description of the uninstallation action taken" - }, - "uninstalledEngines": { - "type": "array", - "items": { - "type": "object", - "properties": { - "version": { - "type": "string" - }, - "variant": { - "type": "string" - } - } - }, - "description": "List of uninstalled engine versions and variants" + "description": "Engine llama-cpp uninstalled successfully!", + "example": "Engine llama-cpp uninstalled successfully!" } } } @@ -1429,6 +1541,44 @@ "tags": ["Engines"] } }, + "/v1/engines/{name}/update": { + "post": { + "summary": "Update engine", + "description": "Updates the specified engine type using the engine variant currently set as default.", + "parameters": [ + { + "name": "name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" + }, + "description": "The name of the engine to update" + } + ], + "responses": { + "200": { + "description": "Engine updated successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "example": "Engine updated successfully" + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, "/v1/engines/{name}/default": { "get": { "summary": "Get default engine variant", @@ -1603,44 +1753,6 @@ "tags": ["Engines"] } }, - "/v1/engines/{name}/update": { - "post": { - "summary": "Update engine", - "description": "Updates the specified engine type using the engine variant currently set as default.", - "parameters": [ - { - "name": "name", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], - "default": "llama-cpp" - }, - "description": "The name of the engine to update" - } - ], - "responses": { - "200": { - "description": "Engine updated successfully", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "message": { - "type": "string", - "example": "Engine updated successfully" - } - } - } - } - } - } - }, - "tags": ["Engines"] - } - }, "/v1/hardware": { "get": { "summary": "Get hardware information", diff --git a/engine/cli/commands/engine_get_cmd.cc b/engine/cli/commands/engine_get_cmd.cc index 8699c336b..3fd1fd576 100644 --- a/engine/cli/commands/engine_get_cmd.cc +++ b/engine/cli/commands/engine_get_cmd.cc @@ -2,8 +2,8 @@ #include #include #include +#include "common/engine_servicei.h" #include "server_start_cmd.h" -#include "services/engine_service.h" #include "utils/curl_utils.h" #include "utils/logging_utils.h" #include "utils/url_parser.h" diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc index f37de2e77..477e38ee2 100644 --- a/engine/cli/commands/engine_install_cmd.cc +++ b/engine/cli/commands/engine_install_cmd.cc @@ -45,28 +45,28 @@ bool EngineInstallCmd::Exec(const std::string& engine, } }); - auto versions_url = url_parser::Url{ + auto releases_url = url_parser::Url{ .protocol = "http", .host = host_ + ":" + std::to_string(port_), - .pathParams = {"v1", "engines", engine, "versions"}, + .pathParams = {"v1", "engines", engine, "releases"}, }; - auto versions_result = curl_utils::SimpleGetJson(versions_url.ToFullPath()); - if (versions_result.has_error()) { - CTL_ERR(versions_result.error()); + auto releases_result = curl_utils::SimpleGetJson(releases_url.ToFullPath()); + if (releases_result.has_error()) { + CTL_ERR(releases_result.error()); return false; } std::vector version_selections; - for (const auto& release_version : versions_result.value()) { + for (const auto& release_version : releases_result.value()) { version_selections.push_back(release_version["name"].asString()); } - auto selected_version = + auto selected_release = cli_selection_utils::PrintSelection(version_selections); - if (selected_version == std::nullopt) { + if (selected_release == std::nullopt) { CTL_ERR("Invalid version selection"); return false; } - std::cout << "Selected version: " << selected_version.value() << std::endl; + std::cout << "Selected version: " << selected_release.value() << std::endl; auto variant_url = url_parser::Url{ .protocol = "http", @@ -76,8 +76,8 @@ bool EngineInstallCmd::Exec(const std::string& engine, "v1", "engines", engine, - "versions", - selected_version.value(), + "releases", + selected_release.value(), }, }; auto variant_result = curl_utils::SimpleGetJson(variant_url.ToFullPath()); @@ -113,23 +113,25 @@ bool EngineInstallCmd::Exec(const std::string& engine, return false; } std::cout << "Selected " << selected_variant.value() << " - " - << selected_version.value() << std::endl; - - auto install_url = - url_parser::Url{.protocol = "http", - .host = host_ + ":" + std::to_string(port_), - .pathParams = - { - "v1", - "engines", - engine, - }, - .queries = { - {"version", selected_version.value()}, - {"variant", selected_variant.value()}, - }}; - - auto response = curl_utils::SimplePostJson(install_url.ToFullPath()); + << selected_release.value() << std::endl; + + auto install_url = url_parser::Url{ + .protocol = "http", + .host = host_ + ":" + std::to_string(port_), + .pathParams = + { + "v1", + "engines", + engine, + "install", + }, + }; + Json::Value body; + body["version"] = selected_release.value(); + body["variant"] = selected_variant.value(); + + auto response = curl_utils::SimplePostJson(install_url.ToFullPath(), + body.toStyledString()); if (response.has_error()) { CTL_ERR(response.error()); return false; @@ -163,14 +165,17 @@ bool EngineInstallCmd::Exec(const std::string& engine, "v1", "engines", engine, + "install", }, }; + Json::Value body; if (!version.empty()) { - install_url.queries = {{"version", version}}; + body["version"] = version; } - auto response = curl_utils::SimplePostJson(install_url.ToFullPath()); + auto response = curl_utils::SimplePostJson(install_url.ToFullPath(), + body.toStyledString()); if (response.has_error()) { // TODO: namh refactor later Json::Value root; diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc index 1ef5580a8..ef9c95af8 100644 --- a/engine/cli/commands/engine_uninstall_cmd.cc +++ b/engine/cli/commands/engine_uninstall_cmd.cc @@ -17,9 +17,10 @@ void EngineUninstallCmd::Exec(const std::string& host, int port, } } - auto url = url_parser::Url{.protocol = "http", - .host = host + ":" + std::to_string(port), - .pathParams = {"v1", "engines", engine}}; + auto url = + url_parser::Url{.protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "engines", engine, "install"}}; auto result = curl_utils::SimpleDeleteJson(url.ToFullPath()); if (result.has_error()) { diff --git a/engine/cli/commands/engine_use_cmd.cc b/engine/cli/commands/engine_use_cmd.cc index d03f9ddc0..c11ca70d2 100644 --- a/engine/cli/commands/engine_use_cmd.cc +++ b/engine/cli/commands/engine_use_cmd.cc @@ -62,12 +62,11 @@ cpp::result EngineUseCmd::Exec(const std::string& host, return cpp::fail("Invalid version selection"); } - auto set_default_engine_variant = - url_parser::Url{.protocol = "http", - .host = host + ":" + std::to_string(port), - .pathParams = {"v1", "engines", engine, "default"}, - .queries = {{"version", selected_version.value()}, - {"variant", selected_variant.value()}}}; + auto set_default_engine_variant = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "engines", engine, "default"}, + }; auto response = curl_utils::SimplePostJson(set_default_engine_variant.ToFullPath()); diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index a75bd1f9b..9e110bd66 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -47,11 +47,24 @@ void Engines::ListEngine( void Engines::UninstallEngine( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::optional version, - const std::optional variant) { + const std::string& engine) { + std::optional norm_variant = std::nullopt; + std::optional norm_version = std::nullopt; + if (req->getJsonObject() != nullptr) { + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + auto version = + (*(req->getJsonObject())).get("version", "latest").asString(); - auto result = - engine_service_->UninstallEngineVariant(engine, version, variant); + if (!variant.empty()) { + norm_variant = variant; + } + if (!version.empty()) { + norm_version = version; + } + } + + auto result = engine_service_->UninstallEngineVariant(engine, norm_version, + norm_variant); Json::Value ret; if (result.has_error()) { @@ -69,7 +82,7 @@ void Engines::UninstallEngine( } } -void Engines::GetEngineVersions( +void Engines::GetEngineReleases( const HttpRequestPtr& req, std::function&& callback, const std::string& engine) const { @@ -134,12 +147,23 @@ void Engines::GetEngineVariants( void Engines::InstallEngine( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::optional version, - const std::optional variant_name) { - auto normalized_version = version.value_or("latest"); + const std::string& engine) { + std::optional norm_variant = std::nullopt; + std::string norm_version{"latest"}; + + if (req->getJsonObject() != nullptr) { + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + auto version = + (*(req->getJsonObject())).get("version", "latest").asString(); - auto result = engine_service_->InstallEngineAsyncV2( - engine, normalized_version, variant_name); + if (!variant.empty()) { + norm_variant = variant; + } + norm_version = version; + } + + auto result = + engine_service_->InstallEngineAsync(engine, norm_version, norm_variant); if (result.has_error()) { Json::Value res; res["message"] = result.error(); @@ -218,8 +242,36 @@ void Engines::GetLatestEngineVersion( void Engines::SetDefaultEngineVariant( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::string& version, - const std::string& variant) { + const std::string& engine) { + auto json_obj = req->getJsonObject(); + if (json_obj == nullptr) { + Json::Value res; + res["message"] = "Request body is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + if (variant.empty()) { + Json::Value ret; + ret["message"] = "Variant is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + auto version = (*(req->getJsonObject())).get("version", "").asString(); + if (version.empty()) { + Json::Value ret; + ret["message"] = "Version is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + auto result = engine_service_->SetDefaultEngineVariant(engine, version, variant); if (result.has_error()) { diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index dc9ef5a54..b0a92b6c3 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -12,57 +12,72 @@ class Engines : public drogon::HttpController { public: METHOD_LIST_BEGIN - METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get); - METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Options, - Post); - METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Options, - Delete); - METHOD_ADD(Engines::SetDefaultEngineVariant, - "/{1}/default?version={2}&variant={3}", Options, Post); - METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get); + // install engine + METHOD_ADD(Engines::InstallEngine, "/{1}/install", Options, Post); + ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/{1}/install", Options, + Post); - METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post); - METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete); - METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post); - METHOD_ADD(Engines::ListEngine, "", Get); + // uninstall engine + METHOD_ADD(Engines::UninstallEngine, "/{1}/install", Options, Delete); + ADD_METHOD_TO(Engines::UninstallEngine, "/v1/engines/{1}/install", Options, + Delete); - METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get); - METHOD_ADD(Engines::GetEngineVariants, "/{1}/versions/{2}", Get); - METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/latest", Get); + // set default engine + METHOD_ADD(Engines::SetDefaultEngineVariant, "/{1}/default", Options, Post); + ADD_METHOD_TO(Engines::SetDefaultEngineVariant, "/v1/engines/{1}/default", + Options, Post); - ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get); - ADD_METHOD_TO(Engines::InstallEngine, - "/v1/engines/{1}?version={2}&variant={3}", Options, Post); - ADD_METHOD_TO(Engines::UninstallEngine, - "/v1/engines/{1}?version={2}&variant={3}", Options, Delete); - ADD_METHOD_TO(Engines::SetDefaultEngineVariant, - "/v1/engines/{1}/default?version={2}&variant={3}", Options, - Post); + // get default engine + METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get); ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default", Get); + // update engine + METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post); + ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post); + + // load engine + METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post); ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Options, Post); + + // unload engine + METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete); ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Options, Delete); - ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post); - ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get); - ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}", - Get); + + METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get); + ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get); + + METHOD_ADD(Engines::ListEngine, "", Get); ADD_METHOD_TO(Engines::ListEngine, "/v1/engines", Get); + + METHOD_ADD(Engines::GetEngineReleases, "/{1}/releases", Get); + ADD_METHOD_TO(Engines::GetEngineReleases, "/v1/engines/{1}/releases", Get); + + METHOD_ADD(Engines::GetEngineVariants, "/{1}/releases/{2}", Get); + ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/releases/{2}", + Get); + + METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/releases/latest", Get); + ADD_METHOD_TO(Engines::GetLatestEngineVersion, + "/v1/engines/{1}/releases/latest", Get); + METHOD_LIST_END explicit Engines(std::shared_ptr engine_service) : engine_service_{engine_service} {} - void ListEngine(const HttpRequestPtr& req, - std::function&& callback) const; + void InstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine); void UninstallEngine(const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, - const std::optional version, - const std::optional variant); + const std::string& engine); + + void ListEngine(const HttpRequestPtr& req, + std::function&& callback) const; - void GetEngineVersions(const HttpRequestPtr& req, + void GetEngineReleases(const HttpRequestPtr& req, std::function&& callback, const std::string& engine) const; @@ -71,12 +86,6 @@ class Engines : public drogon::HttpController { const std::string& engine, const std::string& version) const; - void InstallEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine, - const std::optional version, - const std::optional variant_name); - void GetInstalledEngineVariants( const HttpRequestPtr& req, std::function&& callback, @@ -94,8 +103,7 @@ class Engines : public drogon::HttpController { void SetDefaultEngineVariant( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::string& version, - const std::string& variant); + const std::string& engine); void GetDefaultEngineVariant( const HttpRequestPtr& req, diff --git a/engine/e2e-test/test_api_engine_install.py b/engine/e2e-test/test_api_engine_install.py index b0fbb6c9c..aabe0138d 100644 --- a/engine/e2e-test/test_api_engine_install.py +++ b/engine/e2e-test/test_api_engine_install.py @@ -18,17 +18,19 @@ def setup_and_teardown(self): stop_server() def test_engines_install_llamacpp_should_be_successful(self): - response = requests.post("http://localhost:3928/v1/engines/llama-cpp") + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 def test_engines_install_llamacpp_specific_version_and_variant(self): + data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"} response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24&variant=linux-amd64-avx-cuda-11-7" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 def test_engines_install_llamacpp_specific_version_and_null_variant(self): + data = {"version": "v0.1.35-27.10.24"} response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py index 491bc2d27..06c3c241c 100644 --- a/engine/e2e-test/test_api_engine_uninstall.py +++ b/engine/e2e-test/test_api_engine_uninstall.py @@ -1,6 +1,11 @@ import pytest import requests -from test_runner import start_server, stop_server +from test_runner import ( + run, + start_server, + stop_server, + wait_for_websocket_download_success_event, +) class TestApiEngineUninstall: @@ -18,47 +23,56 @@ def setup_and_teardown(self): stop_server() def test_engines_uninstall_llamacpp_should_be_successful(self): - # install first - requests.post("http://localhost:3928/v1/engines/llama-cpp") - - response = requests.delete("http://localhost:3928/v1/engines/llama-cpp") + # install first, using cli for synchronously + run( + "Install Engine", + ["engines", "install", "llama-cpp"], + timeout=120, + capture=False, + ) + response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self): # install first - install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35" + run( + "Install Engine", + ["engines", "install", "llama-cpp", "-v", "v0.1.35"], + timeout=None, + capture=False, ) - assert install_response.status_code == 200 + data = {"version": "v0.1.35"} response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 400 assert response.json()["message"] == "No variant provided" - def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self): + @pytest.mark.asyncio + async def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self): # install first + data = {"variant": "mac-arm64"} install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64" + "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data ) + await wait_for_websocket_download_success_event(timeout=120) assert install_response.status_code == 200 - response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64" - ) + response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 def test_engines_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful( self, ): + data = {"variant": "mac-arm64", "version": "v0.1.35"} # install first install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert install_response.status_code == 200 response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py index 0ca151d48..ede5e9758 100644 --- a/engine/e2e-test/test_cli_engine_uninstall.py +++ b/engine/e2e-test/test_cli_engine_uninstall.py @@ -24,8 +24,8 @@ def setup_and_teardown(self): @pytest.mark.asyncio async def test_engines_uninstall_llamacpp_should_be_successfully(self): - requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp") - await wait_for_websocket_download_success_event(timeout=None) + requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp/install") + await wait_for_websocket_download_success_event(timeout=120) exit_code, output, error = run( "Uninstall engine", ["engines", "uninstall", "llama-cpp"] ) diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 8d8a4a65c..40356f163 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -69,12 +69,12 @@ std::string GetEnginePath(std::string_view e) { }; } // namespace -cpp::result EngineService::InstallEngineAsyncV2( +cpp::result EngineService::InstallEngineAsync( const std::string& engine, const std::string& version, const std::optional variant_name) { auto ne = NormalizeEngine(engine); - CTL_INF("InstallEngineAsyncV2: " << ne << ", " << version << ", " - << variant_name.value_or("")); + CTL_INF("InstallEngineAsync: " << ne << ", " << version << ", " + << variant_name.value_or("")); auto os = hw_inf_.sys_inf->os; if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { return cpp::fail("Engine " + ne + " is not supported on macOS"); @@ -84,7 +84,7 @@ cpp::result EngineService::InstallEngineAsyncV2( return cpp::fail("Engine " + ne + " is not supported on Linux"); } - auto result = DownloadEngineV2(ne, version, variant_name); + auto result = DownloadEngine(ne, version, variant_name); if (result.has_error()) { return cpp::fail(result.error()); } @@ -95,25 +95,6 @@ cpp::result EngineService::InstallEngineAsyncV2( return {}; } -cpp::result EngineService::InstallEngineAsync( - const std::string& engine, const std::string& version, - const std::string& src) { - // Although this function is called async, only download tasks are performed async - auto ne = NormalizeEngine(engine); - if (!src.empty()) { - auto res = UnzipEngine(ne, version, src); - // If has error or engine is installed successfully - if (res.has_error() || res.value()) { - return res; - } - } - auto result = DownloadEngine(ne, version, true /*async*/); - if (result.has_error()) { - return result; - } - return DownloadCuda(ne, true /*async*/); -} - cpp::result EngineService::UnzipEngine( const std::string& engine, const std::string& version, const std::string& path) { @@ -242,7 +223,7 @@ cpp::result EngineService::UninstallEngineVariant( } } -cpp::result EngineService::DownloadEngineV2( +cpp::result EngineService::DownloadEngine( const std::string& engine, const std::string& version, const std::optional variant_name) { auto normalized_version = version == "latest" @@ -377,101 +358,6 @@ cpp::result EngineService::DownloadEngineV2( return {}; } -cpp::result EngineService::DownloadEngine( - const std::string& engine, const std::string& version, bool async) { - auto res = GetEngineVariants(engine, version); - if (res.has_error()) { - return cpp::fail("Failed to fetch engine releases: " + res.error()); - } - - if (res.value().empty()) { - return cpp::fail("No release found for " + version); - } - - auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch}; - - std::vector variants; - for (const auto& asset : res.value()) { - variants.push_back(asset.name); - } - - CTL_INF("engine: " << engine); - CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version); - auto matched_variant = GetMatchedVariant(engine, variants); - CTL_INF("Matched variant: " << matched_variant); - if (matched_variant.empty()) { - CTL_ERR("No variant found for " << os_arch); - return cpp::fail("No variant found for " + os_arch); - } - - for (const auto& asset : res.value()) { - if (asset.name == matched_variant) { - CTL_INF("Download url: " << asset.browser_download_url); - - std::filesystem::path engine_folder_path = - file_manager_utils::GetContainerFolderPath( - file_manager_utils::DownloadTypeToString(DownloadType::Engine)) / - engine; - - if (!std::filesystem::exists(engine_folder_path)) { - CTL_INF("Creating " << engine_folder_path.string()); - std::filesystem::create_directories(engine_folder_path); - } - if (IsEngineLoaded(engine)) { - CTL_INF("Engine " << engine << " is already loaded, unloading it"); - auto unload_res = UnloadEngine(engine); - if (unload_res.has_error()) { - CTL_INF("Failed to unload engine: " << unload_res.error()); - return cpp::fail(unload_res.error()); - } else { - CTL_INF("Engine " << engine << " unloaded successfully"); - } - } - CTL_INF("Engine folder path: " << engine_folder_path.string() << "\n"); - auto local_path = engine_folder_path / asset.name; - auto downloadTask{ - DownloadTask{.id = engine, - .type = DownloadType::Engine, - .items = {DownloadItem{ - .id = engine, - .downloadUrl = asset.browser_download_url, - .localPath = local_path, - }}}}; - - auto on_finished = [](const DownloadTask& finishedTask) { - // try to unzip the downloaded file - CTL_INF( - "Engine zip path: " << finishedTask.items[0].localPath.string()); - - std::filesystem::path extract_path = - finishedTask.items[0].localPath.parent_path().parent_path(); - - archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(), - extract_path.string()); - - // remove the downloaded file - try { - std::filesystem::remove(finishedTask.items[0].localPath); - } catch (const std::exception& e) { - CTL_WRN("Could not delete file: " << e.what()); - } - CTL_INF("Finished!"); - }; - - if (async) { - auto res = download_service_->AddTask(downloadTask, on_finished); - if (res.has_error()) { - return cpp::fail(res.error()); - } - return true; - } else { - return download_service_->AddDownloadTask(downloadTask, on_finished); - } - } - } - return true; -} - cpp::result EngineService::DownloadCuda( const std::string& engine, bool async) { if (hw_inf_.sys_inf->os == "mac" || engine == kOnnxRepo || @@ -1032,8 +918,8 @@ cpp::result EngineService::UpdateEngine( << default_variant->variant << " is not up-to-date! Current: " << default_variant->version << ", latest: " << latest_version->name); - auto res = InstallEngineAsyncV2(engine, latest_version->tag_name, - default_variant->variant); + auto res = InstallEngineAsync(engine, latest_version->tag_name, + default_variant->variant); return EngineUpdateResult{.engine = engine, .variant = default_variant->variant, diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index b339fd7df..dee8a530b 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -4,6 +4,7 @@ #include #include #include +#include "common/engine_servicei.h" #include "cortex-common/EngineI.h" #include "cortex-common/cortexpythoni.h" #include "services/download_service.h" @@ -13,7 +14,6 @@ #include "utils/github_release_utils.h" #include "utils/result.hpp" #include "utils/system_info_utils.h" -#include "common/engine_servicei.h" struct EngineUpdateResult { std::string engine; @@ -37,7 +37,7 @@ struct SystemInfo; using EngineV = std::variant; -class EngineService: public EngineServiceI { +class EngineService : public EngineServiceI { private: using EngineRelease = github_release_utils::GitHubRelease; using EngineVariant = github_release_utils::GitHubAsset; @@ -69,17 +69,13 @@ class EngineService: public EngineServiceI { */ cpp::result IsEngineReady(const std::string& engine) const; - cpp::result InstallEngineAsync( - const std::string& engine, const std::string& version = "latest", - const std::string& src = ""); - /** * Handling install engine variant. * * If no version provided, choose `latest`. * If no variant provided, automatically pick the best variant. */ - cpp::result InstallEngineAsyncV2( + cpp::result InstallEngineAsync( const std::string& engine, const std::string& version, const std::optional variant_name); @@ -125,11 +121,7 @@ class EngineService: public EngineServiceI { const std::string& engine); private: - cpp::result DownloadEngine( - const std::string& engine, const std::string& version = "latest", - bool async = false); - - cpp::result DownloadEngineV2( + cpp::result DownloadEngine( const std::string& engine, const std::string& version = "latest", const std::optional variant_name = std::nullopt);