From 77c56ff55e3a71047a487950d81f78c3b40e409a Mon Sep 17 00:00:00 2001
From: vansangpfiev <sang@jan.ai>
Date: Mon, 10 Feb 2025 16:04:52 +0700
Subject: [PATCH 1/2] fix: add default cpu_threads

---
 engine/services/model_service.cc              | 25 +++++++++++++++++++
 engine/services/model_service.h               |  2 ++
 engine/utils/hardware/gguf/gguf_file.h        | 12 ++++++---
 .../utils/hardware/gguf/gguf_file_estimate.h  |  2 +-
 4 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 6dc1642fb..496646758 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -939,6 +939,11 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
 
     json_helper::MergeJson(json_data, params_override);
 
+    // Set default cpu_threads if it is not configured
+    if (!json_data.isMember("cpu_threads")) {
+      json_data["cpu_threads"] = GetCpuThreads();
+    }
+
     // Set the latest ctx_len
     if (ctx_len) {
       json_data["ctx_len"] =
@@ -1321,6 +1326,26 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
   return warning;
 }
 
+int ModelService::GetCpuThreads() const {
+  auto hw_thread_num = std::thread::hardware_concurrency();
+  auto default_engine = engine_svc_->GetDefaultEngineVariant(kLlamaEngine);
+  auto is_gpu_mode = false;
+  if (default_engine.has_error()) {
+    CTL_INF("Could not get default engine");
+  } else {
+    auto& de = default_engine.value();
+    is_gpu_mode = (de.variant.find("cuda") != std::string::npos) ||
+                  (de.variant.find("vulkan") != std::string::npos) ||
+                  (de.variant.find("mac") != std::string::npos);
+  }
+
+  if (is_gpu_mode) {
+    return std::max(hw_thread_num / 4, 1u);
+  } else {
+    return std::max(hw_thread_num / 2, 1u);
+  }
+}
+
 cpp::result<std::shared_ptr<ModelMetadata>, std::string>
 ModelService::GetModelMetadata(const std::string& model_id) const {
   if (model_id.empty()) {
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 17f2c0ddb..dcf99430f 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -112,6 +112,8 @@ class ModelService {
       const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
       int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
 
+  int GetCpuThreads() const;
+
   std::shared_ptr<DatabaseService> db_service_;
   std::shared_ptr<HardwareService> hw_service_;
   std::shared_ptr<DownloadService> download_service_;
diff --git a/engine/utils/hardware/gguf/gguf_file.h b/engine/utils/hardware/gguf/gguf_file.h
index 361668242..640c1b49f 100644
--- a/engine/utils/hardware/gguf/gguf_file.h
+++ b/engine/utils/hardware/gguf/gguf_file.h
@@ -7,11 +7,11 @@
 #include <filesystem>
 #include <iostream>
 #include <memory>
+#include <optional>
 #include <string>
 #include <unordered_set>
 #include <variant>
 #include <vector>
-#include <optional>
 
 #ifdef _WIN32
 #include <io.h>
@@ -23,8 +23,8 @@
 #endif
 
 #include "ggml.h"
-#include "utils/string_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/string_utils.h"
 
 // #define GGUF_LOG(msg)                                                  \
 //   do {                                                                 \
@@ -246,11 +246,15 @@ struct GGUFHelper {
     file_size = std::filesystem::file_size(file_path);
 
     int fd = open(file_path.c_str(), O_RDONLY);
+    if (fd == -1) {
+      CTL_INF("Failed to open file: " << file_path << ", error: " << errno);
+      return false;
+    }
     // Memory-map the file
     data = static_cast<uint8_t*>(
         mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0));
     if (data == MAP_FAILED) {
-      perror("Error mapping file");
+      CTL_INF("Error mapping file");
       close(fd);
       return false;
     }
@@ -482,7 +486,7 @@ struct GGUFFile {
 inline std::optional<GGUFFile> ParseGgufFile(const std::string& path) {
   GGUFFile gf;
   GGUFHelper h;
-  if(!h.OpenAndMMap(path)) {
+  if (!h.OpenAndMMap(path)) {
     return std::nullopt;
   }
 
diff --git a/engine/utils/hardware/gguf/gguf_file_estimate.h b/engine/utils/hardware/gguf/gguf_file_estimate.h
index 12a7e72e1..402a70958 100644
--- a/engine/utils/hardware/gguf/gguf_file_estimate.h
+++ b/engine/utils/hardware/gguf/gguf_file_estimate.h
@@ -64,7 +64,6 @@ inline float GetQuantBit(const std::string& kv_cache_t) {
 
 inline std::optional<Estimation> EstimateLLaMACppRun(
     const std::string& file_path, const RunConfig& rc) {
-  Estimation res;
   // token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit/16 bytes
   //RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? Output_layer_size +  (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - Output_layer_size) : 0  )  (bytes)
 
@@ -72,6 +71,7 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
   auto gf = ParseGgufFile(file_path);
   if (!gf)
     return std::nullopt;
+  Estimation res;
   int32_t embedding_length = 0;
   int64_t n_vocab = 0;
   int32_t num_block = 0;

From 85a07760ecad7d26a6b9da31c21fcd3b74ee6b4c Mon Sep 17 00:00:00 2001
From: sangjanai <sang@jan.ai>
Date: Fri, 14 Feb 2025 08:56:40 +0700
Subject: [PATCH 2/2] fix: use half of cpu threads

---
 engine/services/model_service.cc | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 496646758..8f968437c 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -1327,23 +1327,7 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
 }
 
 int ModelService::GetCpuThreads() const {
-  auto hw_thread_num = std::thread::hardware_concurrency();
-  auto default_engine = engine_svc_->GetDefaultEngineVariant(kLlamaEngine);
-  auto is_gpu_mode = false;
-  if (default_engine.has_error()) {
-    CTL_INF("Could not get default engine");
-  } else {
-    auto& de = default_engine.value();
-    is_gpu_mode = (de.variant.find("cuda") != std::string::npos) ||
-                  (de.variant.find("vulkan") != std::string::npos) ||
-                  (de.variant.find("mac") != std::string::npos);
-  }
-
-  if (is_gpu_mode) {
-    return std::max(hw_thread_num / 4, 1u);
-  } else {
-    return std::max(hw_thread_num / 2, 1u);
-  }
+  return std::max(std::thread::hardware_concurrency() / 2, 1u);
 }
 
 cpp::result<std::shared_ptr<ModelMetadata>, std::string>