diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 45c7235c7..c6e127436 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -106,36 +106,50 @@ jobs: cd engine make run-unit-tests - # - name: Run e2e tests - # if: runner.os != 'Windows' && github.event.pull_request.draft == false - # run: | - # cd engine - # cp build/cortex build/cortex-nightly - # cp build/cortex build/cortex-beta - # python -m pip install --upgrade pip - # python -m pip install pytest - # python -m pip install requests - # python e2e-test/main.py - # rm build/cortex-nightly - # rm build/cortex-beta - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - # - name: Run e2e tests - # if: runner.os == 'Windows' && github.event.pull_request.draft == false - # run: | - # cd engine - # cp build/cortex.exe build/cortex-nightly.exe - # cp build/cortex.exe build/cortex-beta.exe - # python -m pip install --upgrade pip - # python -m pip install pytest - # python -m pip install requests - # python e2e-test/main.py - # rm build/cortex-nightly.exe - # rm build/cortex-beta.exe - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Run setup config for macos + if: runner.os == 'macOS' + run: | + cd engine + ./build/cortex --version + sed -i '' 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc + + - name: Run setup config for linux + if: runner.os != 'macOS' + shell: bash + run: | + cd engine + ./build/cortex --version + sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc + + - name: Run e2e tests + if: runner.os != 'Windows' && github.event.pull_request.draft == false + run: | + cd engine + cp build/cortex build/cortex-nightly + cp build/cortex build/cortex-beta + python -m pip install --upgrade pip + python -m pip install pytest + python -m pip install requests + python e2e-test/main.py + rm build/cortex-nightly + rm build/cortex-beta + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Run e2e tests + if: runner.os == 'Windows' && github.event.pull_request.draft == false + run: | + cd engine + cp build/cortex.exe build/cortex-nightly.exe + cp build/cortex.exe build/cortex-beta.exe + python -m pip install --upgrade pip + python -m pip install pytest + python -m pip install requests + python e2e-test/main.py + rm build/cortex-nightly.exe + rm build/cortex-beta.exe + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Pre-package run: | diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml index 0cbde59af..d1ca73844 100644 --- a/.github/workflows/template-build-linux-x64.yml +++ b/.github/workflows/template-build-linux-x64.yml @@ -135,20 +135,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.10' - - # - name: Run e2e tests - # run: | - # cd engine - # cp build/cortex build/cortex-nightly - # cp build/cortex build/cortex-beta - # python -m pip install --upgrade pip - # python -m pip install pytest - # python -m pip install requests - # python e2e-test/main.py - # rm build/cortex-nightly - # rm build/cortex-beta - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Pre-package run: | diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml index b8a02aec9..371468dfb 100644 --- a/.github/workflows/template-build-macos.yml +++ b/.github/workflows/template-build-macos.yml @@ -149,20 +149,6 @@ jobs: run: | cd engine make build CMAKE_EXTRA_FLAGS="${{ inputs.cmake-flags }} ${{ matrix.extra-cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ inputs.build-deps-cmake-flags }}" - - # - name: Run e2e tests - # run: | - # cd engine - # cp build/cortex build/cortex-nightly - # cp build/cortex build/cortex-beta - # python -m pip install --upgrade pip - # python -m pip install pytest - # python -m pip install requests - # python e2e-test/main.py - # rm build/cortex-nightly - # rm build/cortex-beta - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Pre-package run: | diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml index 98576bd0b..d1f6f1333 100644 --- a/.github/workflows/template-build-windows-x64.yml +++ b/.github/workflows/template-build-windows-x64.yml @@ -172,20 +172,6 @@ jobs: with: python-version: '3.10' - # - name: Run e2e tests - # run: | - # cd engine - # cp build/cortex.exe build/cortex-nightly.exe - # cp build/cortex.exe build/cortex-beta.exe - # python -m pip install --upgrade pip - # python -m pip install pytest - # python -m pip install requests - # python e2e-test/main.py - # rm build/cortex-nightly.exe - # rm build/cortex-beta.exe - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Pre-package run: | cd engine diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc index 6953e2dea..ebd9eb869 100644 --- a/engine/cli/commands/engine_uninstall_cmd.cc +++ b/engine/cli/commands/engine_uninstall_cmd.cc @@ -21,7 +21,7 @@ void EngineUninstallCmd::Exec(const std::string& host, int port, auto res = cli.Delete("/v1/engines/" + engine); if (res) { if (res->status == httplib::StatusCode::OK_200) { - CLI_LOG("Engine " + engine + " uninstalled successfully"); + CLI_LOG("Engine " + engine + " uninstalled successfully!"); } else { CTL_ERR("Engine failed to uninstall with status code: " << res->status); } diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py index aa78f4026..7f13f6b18 100644 --- a/engine/e2e-test/test_api_model_pull_direct_url.py +++ b/engine/e2e-test/test_api_model_pull_direct_url.py @@ -22,7 +22,6 @@ def setup_and_teardown(self): yield # Teardown - stop_server() run( "Delete model", [ @@ -31,6 +30,7 @@ def setup_and_teardown(self): "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf", ], ) + stop_server() def test_model_pull_with_direct_url_should_be_success(self): myobj = { diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py index fe2a80ceb..906d4b0cf 100644 --- a/engine/e2e-test/test_api_model_start.py +++ b/engine/e2e-test/test_api_model_start.py @@ -15,7 +15,7 @@ def setup_and_teardown(self): # TODO: using pull with branch for easy testing tinyllama:gguf for example run("Delete model", ["models", "delete", "tinyllama:gguf"]) - popen(["pull", "tinyllama"], "1\n") + run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,) yield diff --git a/engine/e2e-test/test_api_model_update.py b/engine/e2e-test/test_api_model_update.py index 8d28d412a..cf35f44f9 100644 --- a/engine/e2e-test/test_api_model_update.py +++ b/engine/e2e-test/test_api_model_update.py @@ -19,5 +19,5 @@ def setup_and_teardown(self): def test_models_update_should_be_successful(self): body_json = {'model': 'tinyllama:gguf'} - response = requests.post("http://localhost:3928/models/tinyllama:gguf", json = body_json) + response = requests.patch("http://localhost:3928/models/tinyllama:gguf", json = body_json) assert response.status_code == 200 diff --git a/engine/e2e-test/test_cli_engine_get.py b/engine/e2e-test/test_cli_engine_get.py index dd3dffbde..d783c3421 100644 --- a/engine/e2e-test/test_cli_engine_get.py +++ b/engine/e2e-test/test_cli_engine_get.py @@ -2,9 +2,21 @@ import pytest from test_runner import run - +from test_runner import start_server, stop_server class TestCliEngineGet: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + + yield + + # Teardown + stop_server() @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test") def test_engines_get_tensorrt_llm_should_not_be_incompatible(self): diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/test_cli_engine_list.py index 10f7470be..ede2879d9 100644 --- a/engine/e2e-test/test_cli_engine_list.py +++ b/engine/e2e-test/test_cli_engine_list.py @@ -2,9 +2,22 @@ import pytest from test_runner import run - +from test_runner import start_server, stop_server class TestCliEngineList: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + + yield + + # Teardown + stop_server() + @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test") def test_engines_list_run_successfully_on_windows(self): exit_code, output, error = run("List engines", ["engines", "list"]) diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py index c53b6f922..5190cee7a 100644 --- a/engine/e2e-test/test_cli_engine_uninstall.py +++ b/engine/e2e-test/test_cli_engine_uninstall.py @@ -1,12 +1,16 @@ import pytest from test_runner import run - +from test_runner import start_server, stop_server class TestCliEngineUninstall: @pytest.fixture(autouse=True) def setup_and_teardown(self): # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + # Preinstall llamacpp engine run("Install Engine", ["engines", "install", "llama-cpp"],timeout = None) @@ -15,6 +19,7 @@ def setup_and_teardown(self): # Teardown # Clean up, removing installed engine run("Uninstall Engine", ["engines", "uninstall", "llama-cpp"]) + stop_server() def test_engines_uninstall_llamacpp_should_be_successfully(self): exit_code, output, error = run( diff --git a/engine/e2e-test/test_cli_model_delete.py b/engine/e2e-test/test_cli_model_delete.py index 3ff7ef61d..f7ab53058 100644 --- a/engine/e2e-test/test_cli_model_delete.py +++ b/engine/e2e-test/test_cli_model_delete.py @@ -1,22 +1,26 @@ import pytest from test_runner import popen, run - +from test_runner import start_server, stop_server class TestCliModelDelete: @pytest.fixture(autouse=True) def setup_and_teardown(self): # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + # Pull model - # TODO: using pull with branch for easy testing tinyllama:gguf for example - popen(["pull", "tinyllama"], "1\n") + run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,) yield # Teardown # Clean up run("Delete model", ["models", "delete", "tinyllama:gguf"]) + stop_server() def test_models_delete_should_be_successful(self): exit_code, output, error = run( diff --git a/engine/e2e-test/test_cli_model_import.py b/engine/e2e-test/test_cli_model_import.py index 1f54ae511..cf94d1a2a 100644 --- a/engine/e2e-test/test_cli_model_import.py +++ b/engine/e2e-test/test_cli_model_import.py @@ -1,7 +1,20 @@ import pytest from test_runner import run +from test_runner import start_server, stop_server class TestCliModelImport: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + + yield + + # Teardown + stop_server() @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.") def test_model_import_should_be_success(self): diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 831c20f7d..9c955dba2 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -9,7 +9,10 @@ #include #include "download_service.h" #include "utils/format_utils.h" +#include "utils/huggingface_utils.h" +#include "utils/logging_utils.h" #include "utils/result.hpp" +#include "utils/url_parser.h" #ifdef _WIN32 #define ftell64(f) _ftelli64(f) @@ -24,6 +27,20 @@ size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) { size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata); return written; } + +inline curl_slist* CreateHeaders(const std::string& url) { + try { + auto url_obj = url_parser::FromUrlString(url); + if (url_obj.host == huggingface_utils::kHuggingfaceHost) { + return huggingface_utils::CreateCurlHfHeaders(); + } else { + return nullptr; + } + } catch (const std::exception& e) { + CTL_WRN(e.what()); + return nullptr; + } +} } // namespace cpp::result DownloadService::VerifyDownloadTask( @@ -98,6 +115,9 @@ cpp::result DownloadService::GetFileSize( curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + if (auto headers = CreateHeaders(url); headers) { + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + } CURLcode res = curl_easy_perform(curl); if (res != CURLE_OK) { @@ -176,6 +196,9 @@ cpp::result DownloadService::Download( } curl_easy_setopt(curl, CURLOPT_URL, download_item.downloadUrl.c_str()); + if (auto headers = CreateHeaders(download_item.downloadUrl); headers) { + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + } curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &WriteCallback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, file); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); @@ -262,6 +285,9 @@ void DownloadService::ProcessTask(DownloadTask& task) { return; } downloading_data_->item_id = item.id; + if (auto headers = CreateHeaders(item.downloadUrl); headers) { + curl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers); + } curl_easy_setopt(handle, CURLOPT_URL, item.downloadUrl.c_str()); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, WriteCallback); curl_easy_setopt(handle, CURLOPT_WRITEDATA, file); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 62cc92e5b..3db54d997 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -70,7 +70,8 @@ cpp::result GetDownloadTask( .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}}; httplib::Client cli(url.GetProtocolAndHost()); - auto res = cli.Get(url.GetPathAndQuery()); + auto res = + cli.Get(url.GetPathAndQuery(), huggingface_utils::CreateHttpHfHeaders()); if (res->status != httplib::StatusCode::OK_200) { return cpp::fail("Model " + modelId + " not found"); } diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index e0e893f42..f61640db3 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -18,6 +18,7 @@ struct CortexConfig { std::string apiServerPort; uint64_t checkedForUpdateAt; std::string latestRelease; + std::string huggingFaceToken; }; const std::string kCortexFolderName = "cortexcpp"; @@ -47,6 +48,7 @@ inline void DumpYamlConfig(const CortexConfig& config, node["apiServerPort"] = config.apiServerPort; node["checkedForUpdateAt"] = config.checkedForUpdateAt; node["latestRelease"] = config.latestRelease; + node["huggingFaceToken"] = config.huggingFaceToken; out_file << node; out_file.close(); @@ -70,7 +72,8 @@ inline CortexConfig FromYaml(const std::string& path, !node["maxLogLines"] || !node["apiServerHost"] || !node["apiServerPort"] || !node["checkedForUpdateAt"] || !node["latestRelease"] || !node["logLlamaCppPath"] || - !node["logOnnxPath"] || !node["logTensorrtLLMPath"]); + !node["logOnnxPath"] || !node["logTensorrtLLMPath"] || + !node["huggingFaceToken"]); CortexConfig config = { .logFolderPath = node["logFolderPath"] @@ -102,6 +105,7 @@ inline CortexConfig FromYaml(const std::string& path, .latestRelease = node["latestRelease"] ? node["latestRelease"].as() : default_cfg.latestRelease, + .huggingFaceToken = node["huggingFaceToken"] ? node["huggingFaceToken"].as() : "", }; if (should_update_config) { DumpYamlConfig(config, path); diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h index 2640bdc9b..b52030726 100644 --- a/engine/utils/curl_utils.h +++ b/engine/utils/curl_utils.h @@ -14,7 +14,8 @@ size_t WriteCallback(void* contents, size_t size, size_t nmemb, } } // namespace -inline cpp::result SimpleGet(const std::string& url) { +inline cpp::result SimpleGet( + const std::string& url, curl_slist* headers = nullptr) { CURL* curl; CURLcode res; std::string readBuffer; @@ -27,6 +28,9 @@ inline cpp::result SimpleGet(const std::string& url) { return cpp::fail("Failed to init CURL"); } curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + if(headers) { + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + } // Set write function callback and data buffer curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); @@ -45,8 +49,8 @@ inline cpp::result SimpleGet(const std::string& url) { } inline cpp::result ReadRemoteYaml( - const std::string& url) { - auto result = SimpleGet(url); + const std::string& url, curl_slist* headers = nullptr) { + auto result = SimpleGet(url, headers); if (result.has_error()) { return cpp::fail(result.error()); } @@ -60,8 +64,8 @@ inline cpp::result ReadRemoteYaml( } inline cpp::result SimpleGetJson( - const std::string& url) { - auto result = SimpleGet(url); + const std::string& url, curl_slist* headers = nullptr) { + auto result = SimpleGet(url, headers); if (result.has_error()) { return cpp::fail(result.error()); } diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h index 97e948866..ab85948e7 100644 --- a/engine/utils/huggingface_utils.h +++ b/engine/utils/huggingface_utils.h @@ -3,7 +3,9 @@ #include #include #include +#include "httplib.h" #include "utils/curl_utils.h" +#include "utils/file_manager_utils.h" #include "utils/result.hpp" #include "utils/url_parser.h" @@ -46,6 +48,33 @@ struct HuggingFaceModelRepoInfo { std::string createdAt; }; +inline std::optional GetHuggingFaceToken() { + auto const& token = file_manager_utils::GetCortexConfig().huggingFaceToken; + if (token.empty()) + return std::nullopt; + return token; +} + +inline curl_slist* CreateCurlHfHeaders() { + struct curl_slist* headers = nullptr; + auto hf_token = GetHuggingFaceToken(); + if (hf_token) { + std::string auth_header = "Authorization: Bearer " + hf_token.value(); + headers = curl_slist_append(headers, auth_header.c_str()); + headers = curl_slist_append(headers, "Content-Type: application/json"); + } + return headers; +} + +inline httplib::Headers CreateHttpHfHeaders() { + httplib::Headers headers; + auto token = GetHuggingFaceToken(); + if (token) { + headers.emplace("Authorization", "Bearer " + token.value()); + } + return headers; +} + inline cpp::result, std::string> GetModelRepositoryBranches(const std::string& author, @@ -58,7 +87,8 @@ GetModelRepositoryBranches(const std::string& author, .host = kHuggingfaceHost, .pathParams = {"api", "models", author, modelName, "refs"}}; - auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath()); + auto result = + curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders()); if (result.has_error()) { return cpp::fail("Failed to get model repository branches: " + author + "/" + modelName); @@ -90,7 +120,8 @@ GetHuggingFaceModelRepoInfo(const std::string& author, .host = kHuggingfaceHost, .pathParams = {"api", "models", author, modelName}}; - auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath()); + auto result = + curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders()); if (result.has_error()) { return cpp::fail("Failed to get model repository info: " + author + "/" + modelName); @@ -162,8 +193,8 @@ inline std::string GetDownloadableUrl(const std::string& author, inline std::optional GetDefaultBranch( const std::string& model_name) { - auto default_model_branch = - curl_utils::ReadRemoteYaml(GetMetadataUrl(model_name)); + auto default_model_branch = curl_utils::ReadRemoteYaml( + GetMetadataUrl(model_name), CreateCurlHfHeaders()); if (default_model_branch.has_error()) { return std::nullopt;