diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 45c7235c7..c6e127436 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -106,36 +106,50 @@ jobs:
           cd engine
           make run-unit-tests
 
-      # - name: Run e2e tests
-      #   if: runner.os != 'Windows' && github.event.pull_request.draft == false
-      #   run: |
-      #     cd engine
-      #     cp build/cortex build/cortex-nightly
-      #     cp build/cortex build/cortex-beta
-      #     python -m pip install --upgrade pip
-      #     python -m pip install pytest
-      #     python -m pip install requests          
-      #     python e2e-test/main.py
-      #     rm build/cortex-nightly
-      #     rm build/cortex-beta
-      #   env:
-      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-
-      # - name: Run e2e tests
-      #   if: runner.os == 'Windows' && github.event.pull_request.draft == false
-      #   run: |
-      #     cd engine
-      #     cp build/cortex.exe build/cortex-nightly.exe
-      #     cp build/cortex.exe build/cortex-beta.exe
-      #     python -m pip install --upgrade pip
-      #     python -m pip install pytest
-      #     python -m pip install requests          
-      #     python e2e-test/main.py
-      #     rm build/cortex-nightly.exe
-      #     rm build/cortex-beta.exe
-      #   env:
-      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Run setup config for macos
+        if: runner.os == 'macOS'
+        run: |
+          cd engine
+          ./build/cortex --version
+          sed -i '' 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
+
+      - name: Run setup config for linux
+        if: runner.os != 'macOS'
+        shell: bash
+        run: |
+          cd engine
+          ./build/cortex --version
+          sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
+
+      - name: Run e2e tests
+        if: runner.os != 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install pytest
+          python -m pip install requests
+          python e2e-test/main.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Run e2e tests
+        if: runner.os == 'Windows' && github.event.pull_request.draft == false
+        run: |
+          cd engine
+          cp build/cortex.exe build/cortex-nightly.exe
+          cp build/cortex.exe build/cortex-beta.exe
+          python -m pip install --upgrade pip
+          python -m pip install pytest
+          python -m pip install requests
+          python e2e-test/main.py
+          rm build/cortex-nightly.exe
+          rm build/cortex-beta.exe
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Pre-package
         run: |
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 0cbde59af..d1ca73844 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -135,20 +135,6 @@ jobs:
         uses: actions/setup-python@v4
         with:
           python-version: '3.10' 
-          
-      # - name: Run e2e tests
-      #   run: |
-      #     cd engine
-      #     cp build/cortex build/cortex-nightly
-      #     cp build/cortex build/cortex-beta
-      #     python -m pip install --upgrade pip
-      #     python -m pip install pytest
-      #     python -m pip install requests          
-      #     python e2e-test/main.py
-      #     rm build/cortex-nightly
-      #     rm build/cortex-beta
-      #   env:
-      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Pre-package
         run: |
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index b8a02aec9..371468dfb 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -149,20 +149,6 @@ jobs:
         run: |
           cd engine
           make build CMAKE_EXTRA_FLAGS="${{ inputs.cmake-flags }} ${{ matrix.extra-cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ inputs.build-deps-cmake-flags }}"
-        
-      # - name: Run e2e tests
-      #   run: |
-      #     cd engine
-      #     cp build/cortex build/cortex-nightly
-      #     cp build/cortex build/cortex-beta
-      #     python -m pip install --upgrade pip
-      #     python -m pip install pytest
-      #     python -m pip install requests          
-      #     python e2e-test/main.py
-      #     rm build/cortex-nightly
-      #     rm build/cortex-beta
-      #   env:
-      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       
       - name: Pre-package
         run: |
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 98576bd0b..d1f6f1333 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -172,20 +172,6 @@ jobs:
         with:
           python-version: '3.10' 
           
-      # - name: Run e2e tests
-      #   run: |
-      #     cd engine
-      #     cp build/cortex.exe build/cortex-nightly.exe
-      #     cp build/cortex.exe build/cortex-beta.exe
-      #     python -m pip install --upgrade pip
-      #     python -m pip install pytest
-      #     python -m pip install requests          
-      #     python e2e-test/main.py
-      #     rm build/cortex-nightly.exe
-      #     rm build/cortex-beta.exe
-      #   env:
-      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          
       - name: Pre-package
         run: |
           cd engine
diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc
index 6953e2dea..ebd9eb869 100644
--- a/engine/cli/commands/engine_uninstall_cmd.cc
+++ b/engine/cli/commands/engine_uninstall_cmd.cc
@@ -21,7 +21,7 @@ void EngineUninstallCmd::Exec(const std::string& host, int port,
   auto res = cli.Delete("/v1/engines/" + engine);
   if (res) {
     if (res->status == httplib::StatusCode::OK_200) {
-      CLI_LOG("Engine " + engine + " uninstalled successfully");
+      CLI_LOG("Engine " + engine + " uninstalled successfully!");
     } else {
       CTL_ERR("Engine failed to uninstall with status code: " << res->status);
     }
diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py
index aa78f4026..7f13f6b18 100644
--- a/engine/e2e-test/test_api_model_pull_direct_url.py
+++ b/engine/e2e-test/test_api_model_pull_direct_url.py
@@ -22,7 +22,6 @@ def setup_and_teardown(self):
         yield
 
         # Teardown
-        stop_server()
         run(
             "Delete model",
             [
@@ -31,6 +30,7 @@ def setup_and_teardown(self):
                 "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
             ],
         )
+        stop_server()        
 
     def test_model_pull_with_direct_url_should_be_success(self):
         myobj = {
diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py
index fe2a80ceb..906d4b0cf 100644
--- a/engine/e2e-test/test_api_model_start.py
+++ b/engine/e2e-test/test_api_model_start.py
@@ -15,7 +15,7 @@ def setup_and_teardown(self):
 
         # TODO: using pull with branch for easy testing tinyllama:gguf for example
         run("Delete model", ["models", "delete", "tinyllama:gguf"])
-        popen(["pull", "tinyllama"], "1\n")
+        run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,)
 
         yield
 
diff --git a/engine/e2e-test/test_api_model_update.py b/engine/e2e-test/test_api_model_update.py
index 8d28d412a..cf35f44f9 100644
--- a/engine/e2e-test/test_api_model_update.py
+++ b/engine/e2e-test/test_api_model_update.py
@@ -19,5 +19,5 @@ def setup_and_teardown(self):
 
     def test_models_update_should_be_successful(self):
         body_json = {'model': 'tinyllama:gguf'}
-        response = requests.post("http://localhost:3928/models/tinyllama:gguf", json = body_json)        
+        response = requests.patch("http://localhost:3928/models/tinyllama:gguf", json = body_json)        
         assert response.status_code == 200
diff --git a/engine/e2e-test/test_cli_engine_get.py b/engine/e2e-test/test_cli_engine_get.py
index dd3dffbde..d783c3421 100644
--- a/engine/e2e-test/test_cli_engine_get.py
+++ b/engine/e2e-test/test_cli_engine_get.py
@@ -2,9 +2,21 @@
 
 import pytest
 from test_runner import run
-
+from test_runner import start_server, stop_server
 
 class TestCliEngineGet:
+    
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
 
     @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
     def test_engines_get_tensorrt_llm_should_not_be_incompatible(self):
diff --git a/engine/e2e-test/test_cli_engine_list.py b/engine/e2e-test/test_cli_engine_list.py
index 10f7470be..ede2879d9 100644
--- a/engine/e2e-test/test_cli_engine_list.py
+++ b/engine/e2e-test/test_cli_engine_list.py
@@ -2,9 +2,22 @@
 
 import pytest
 from test_runner import run
-
+from test_runner import start_server, stop_server
 
 class TestCliEngineList:
+    
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
+        
     @pytest.mark.skipif(platform.system() != "Windows", reason="Windows-specific test")
     def test_engines_list_run_successfully_on_windows(self):
         exit_code, output, error = run("List engines", ["engines", "list"])
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py
index c53b6f922..5190cee7a 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/test_cli_engine_uninstall.py
@@ -1,12 +1,16 @@
 import pytest
 from test_runner import run
-
+from test_runner import start_server, stop_server
 
 class TestCliEngineUninstall:
 
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
         # Preinstall llamacpp engine
         run("Install Engine", ["engines", "install", "llama-cpp"],timeout = None)
 
@@ -15,6 +19,7 @@ def setup_and_teardown(self):
         # Teardown
         # Clean up, removing installed engine
         run("Uninstall Engine", ["engines", "uninstall", "llama-cpp"])
+        stop_server()
 
     def test_engines_uninstall_llamacpp_should_be_successfully(self):
         exit_code, output, error = run(
diff --git a/engine/e2e-test/test_cli_model_delete.py b/engine/e2e-test/test_cli_model_delete.py
index 3ff7ef61d..f7ab53058 100644
--- a/engine/e2e-test/test_cli_model_delete.py
+++ b/engine/e2e-test/test_cli_model_delete.py
@@ -1,22 +1,26 @@
 import pytest
 from test_runner import popen, run
-
+from test_runner import start_server, stop_server
 
 class TestCliModelDelete:
 
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
         # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
         # Pull model
 
-        # TODO: using pull with branch for easy testing tinyllama:gguf for example
-        popen(["pull", "tinyllama"], "1\n")
+        run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,)
 
         yield
 
         # Teardown
         # Clean up
         run("Delete model", ["models", "delete", "tinyllama:gguf"])
+        stop_server()
 
     def test_models_delete_should_be_successful(self):
         exit_code, output, error = run(
diff --git a/engine/e2e-test/test_cli_model_import.py b/engine/e2e-test/test_cli_model_import.py
index 1f54ae511..cf94d1a2a 100644
--- a/engine/e2e-test/test_cli_model_import.py
+++ b/engine/e2e-test/test_cli_model_import.py
@@ -1,7 +1,20 @@
 import pytest
 from test_runner import run
+from test_runner import start_server, stop_server
 
 class TestCliModelImport:
+    
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+
+        yield
+
+        # Teardown
+        stop_server()
 
     @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
     def test_model_import_should_be_success(self):
diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc
index 831c20f7d..9c955dba2 100644
--- a/engine/services/download_service.cc
+++ b/engine/services/download_service.cc
@@ -9,7 +9,10 @@
 #include <utility>
 #include "download_service.h"
 #include "utils/format_utils.h"
+#include "utils/huggingface_utils.h"
+#include "utils/logging_utils.h"
 #include "utils/result.hpp"
+#include "utils/url_parser.h"
 
 #ifdef _WIN32
 #define ftell64(f) _ftelli64(f)
@@ -24,6 +27,20 @@ size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
   size_t written = fwrite(ptr, size, nmemb, (FILE*)userdata);
   return written;
 }
+
+inline curl_slist* CreateHeaders(const std::string& url) {
+  try {
+    auto url_obj = url_parser::FromUrlString(url);
+    if (url_obj.host == huggingface_utils::kHuggingfaceHost) {
+      return huggingface_utils::CreateCurlHfHeaders();
+    } else {
+      return nullptr;
+    }
+  } catch (const std::exception& e) {
+    CTL_WRN(e.what());
+    return nullptr;
+  }
+}
 }  // namespace
 
 cpp::result<void, std::string> DownloadService::VerifyDownloadTask(
@@ -98,6 +115,9 @@ cpp::result<uint64_t, std::string> DownloadService::GetFileSize(
   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
   curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
   curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+  if (auto headers = CreateHeaders(url); headers) {
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  }
   CURLcode res = curl_easy_perform(curl);
 
   if (res != CURLE_OK) {
@@ -176,6 +196,9 @@ cpp::result<bool, std::string> DownloadService::Download(
   }
 
   curl_easy_setopt(curl, CURLOPT_URL, download_item.downloadUrl.c_str());
+  if (auto headers = CreateHeaders(download_item.downloadUrl); headers) {
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  }
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &WriteCallback);
   curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
   curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
@@ -262,6 +285,9 @@ void DownloadService::ProcessTask(DownloadTask& task) {
       return;
     }
     downloading_data_->item_id = item.id;
+    if (auto headers = CreateHeaders(item.downloadUrl); headers) {
+      curl_easy_setopt(handle, CURLOPT_HTTPHEADER, headers);
+    }
     curl_easy_setopt(handle, CURLOPT_URL, item.downloadUrl.c_str());
     curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, WriteCallback);
     curl_easy_setopt(handle, CURLOPT_WRITEDATA, file);
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 62cc92e5b..3db54d997 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -70,7 +70,8 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
       .pathParams = {"api", "models", "cortexso", modelId, "tree", branch}};
 
   httplib::Client cli(url.GetProtocolAndHost());
-  auto res = cli.Get(url.GetPathAndQuery());
+  auto res =
+      cli.Get(url.GetPathAndQuery(), huggingface_utils::CreateHttpHfHeaders());
   if (res->status != httplib::StatusCode::OK_200) {
     return cpp::fail("Model " + modelId + " not found");
   }
diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
index e0e893f42..f61640db3 100644
--- a/engine/utils/config_yaml_utils.h
+++ b/engine/utils/config_yaml_utils.h
@@ -18,6 +18,7 @@ struct CortexConfig {
   std::string apiServerPort;
   uint64_t checkedForUpdateAt;
   std::string latestRelease;
+  std::string huggingFaceToken;
 };
 
 const std::string kCortexFolderName = "cortexcpp";
@@ -47,6 +48,7 @@ inline void DumpYamlConfig(const CortexConfig& config,
     node["apiServerPort"] = config.apiServerPort;
     node["checkedForUpdateAt"] = config.checkedForUpdateAt;
     node["latestRelease"] = config.latestRelease;
+    node["huggingFaceToken"] = config.huggingFaceToken;
 
     out_file << node;
     out_file.close();
@@ -70,7 +72,8 @@ inline CortexConfig FromYaml(const std::string& path,
          !node["maxLogLines"] || !node["apiServerHost"] ||
          !node["apiServerPort"] || !node["checkedForUpdateAt"] ||
          !node["latestRelease"] || !node["logLlamaCppPath"] ||
-         !node["logOnnxPath"] || !node["logTensorrtLLMPath"]);
+         !node["logOnnxPath"] || !node["logTensorrtLLMPath"] ||
+         !node["huggingFaceToken"]);
 
     CortexConfig config = {
         .logFolderPath = node["logFolderPath"]
@@ -102,6 +105,7 @@ inline CortexConfig FromYaml(const std::string& path,
         .latestRelease = node["latestRelease"]
                              ? node["latestRelease"].as<std::string>()
                              : default_cfg.latestRelease,
+        .huggingFaceToken = node["huggingFaceToken"] ? node["huggingFaceToken"].as<std::string>() : "",
     };
     if (should_update_config) {
       DumpYamlConfig(config, path);
diff --git a/engine/utils/curl_utils.h b/engine/utils/curl_utils.h
index 2640bdc9b..b52030726 100644
--- a/engine/utils/curl_utils.h
+++ b/engine/utils/curl_utils.h
@@ -14,7 +14,8 @@ size_t WriteCallback(void* contents, size_t size, size_t nmemb,
 }
 }  // namespace
 
-inline cpp::result<std::string, std::string> SimpleGet(const std::string& url) {
+inline cpp::result<std::string, std::string> SimpleGet(
+    const std::string& url, curl_slist* headers = nullptr) {
   CURL* curl;
   CURLcode res;
   std::string readBuffer;
@@ -27,6 +28,9 @@ inline cpp::result<std::string, std::string> SimpleGet(const std::string& url) {
     return cpp::fail("Failed to init CURL");
   }
   curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+  if(headers) {
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  }
 
   // Set write function callback and data buffer
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
@@ -45,8 +49,8 @@ inline cpp::result<std::string, std::string> SimpleGet(const std::string& url) {
 }
 
 inline cpp::result<YAML::Node, std::string> ReadRemoteYaml(
-    const std::string& url) {
-  auto result = SimpleGet(url);
+    const std::string& url, curl_slist* headers = nullptr) {
+  auto result = SimpleGet(url, headers);
   if (result.has_error()) {
     return cpp::fail(result.error());
   }
@@ -60,8 +64,8 @@ inline cpp::result<YAML::Node, std::string> ReadRemoteYaml(
 }
 
 inline cpp::result<nlohmann::json, std::string> SimpleGetJson(
-    const std::string& url) {
-  auto result = SimpleGet(url);
+    const std::string& url, curl_slist* headers = nullptr) {
+  auto result = SimpleGet(url, headers);
   if (result.has_error()) {
     return cpp::fail(result.error());
   }
diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h
index 97e948866..ab85948e7 100644
--- a/engine/utils/huggingface_utils.h
+++ b/engine/utils/huggingface_utils.h
@@ -3,7 +3,9 @@
 #include <optional>
 #include <string>
 #include <vector>
+#include "httplib.h"
 #include "utils/curl_utils.h"
+#include "utils/file_manager_utils.h"
 #include "utils/result.hpp"
 #include "utils/url_parser.h"
 
@@ -46,6 +48,33 @@ struct HuggingFaceModelRepoInfo {
   std::string createdAt;
 };
 
+inline std::optional<std::string> GetHuggingFaceToken() {
+  auto const& token = file_manager_utils::GetCortexConfig().huggingFaceToken;
+  if (token.empty())
+    return std::nullopt;
+  return token;
+}
+
+inline curl_slist* CreateCurlHfHeaders() {
+  struct curl_slist* headers = nullptr;
+  auto hf_token = GetHuggingFaceToken();
+  if (hf_token) {
+    std::string auth_header = "Authorization: Bearer " + hf_token.value();
+    headers = curl_slist_append(headers, auth_header.c_str());
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+  }
+  return headers;
+}
+
+inline httplib::Headers CreateHttpHfHeaders() {
+  httplib::Headers headers;
+  auto token = GetHuggingFaceToken();
+  if (token) {
+    headers.emplace("Authorization", "Bearer " + token.value());
+  }
+  return headers;
+}
+
 inline cpp::result<std::unordered_map<std::string, HuggingFaceBranch>,
                    std::string>
 GetModelRepositoryBranches(const std::string& author,
@@ -58,7 +87,8 @@ GetModelRepositoryBranches(const std::string& author,
       .host = kHuggingfaceHost,
       .pathParams = {"api", "models", author, modelName, "refs"}};
 
-  auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath());
+  auto result =
+      curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders());
   if (result.has_error()) {
     return cpp::fail("Failed to get model repository branches: " + author +
                      "/" + modelName);
@@ -90,7 +120,8 @@ GetHuggingFaceModelRepoInfo(const std::string& author,
                       .host = kHuggingfaceHost,
                       .pathParams = {"api", "models", author, modelName}};
 
-  auto result = curl_utils::SimpleGetJson(url_obj.ToFullPath());
+  auto result =
+      curl_utils::SimpleGetJson(url_obj.ToFullPath(), CreateCurlHfHeaders());
   if (result.has_error()) {
     return cpp::fail("Failed to get model repository info: " + author + "/" +
                      modelName);
@@ -162,8 +193,8 @@ inline std::string GetDownloadableUrl(const std::string& author,
 
 inline std::optional<std::string> GetDefaultBranch(
     const std::string& model_name) {
-  auto default_model_branch =
-      curl_utils::ReadRemoteYaml(GetMetadataUrl(model_name));
+  auto default_model_branch = curl_utils::ReadRemoteYaml(
+      GetMetadataUrl(model_name), CreateCurlHfHeaders());
 
   if (default_model_branch.has_error()) {
     return std::nullopt;