From 3ff301a234945f94fad37c022d170fb20e015879 Mon Sep 17 00:00:00 2001
From: Roushan Kumar Singh <158602016+github-roushan@users.noreply.github.com>
Date: Tue, 25 Feb 2025 14:15:17 +0530
Subject: [PATCH 1/7] chore: fix typos, and update 'variant' description
 (#2023)

Corrected table name (to schema_version), fixed typo, and added description for the 'variant' column.
---
 docs/docs/architecture/cortex-db.mdx | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx
index 7434bc62e..6182c74f4 100644
--- a/docs/docs/architecture/cortex-db.mdx
+++ b/docs/docs/architecture/cortex-db.mdx
@@ -16,12 +16,12 @@ This document outlines Cortex database architecture which is designed to store a
 files and more.
 
 ## Table Structure
-### schema Table
-The `schema` table is designed to hold schema version for cortex database. Below is the structure of the table:
+### schema_version Table
+The `schema_version` table is designed to hold schema version for cortex database. Below is the structure of the table:
 
 | Column Name        | Data Type | Description                                             |
 |--------------------|-----------|---------------------------------------------------------|
-| schema_version     | INTEGER   | A unique schema version for database.                   |
+| version     | INTEGER   | A unique schema version for database.                   |
 
 
 ### models Table
@@ -64,10 +64,10 @@ Below is the structure of the table:
 | api_key      | TEXT      |  |
 | url          | TEXT      |  |
 | version      | TEXT      | The current version of the engine. |
-| variant      | TEXT      |  |
+| variant      | TEXT      | A string that specifies the specific configuration or build variant of the engine. |
 | status       | TEXT      | Current status of the engine (e.g., "downloaded", "downloadable"). |
 | metadata     | TEXT      | Additional metadata or information about the engine. |
-| date_ceated  | TEXT      | Date when the engine was downloaded. |
+| date_created  | TEXT      | Date when the engine was downloaded. |
 | date_updated | TEXT      | Date when the engine was last updated. |
 
 ### files Table
@@ -79,5 +79,5 @@ The `files` table is designed to hold metadata about objects dowloaded via Corte
 | object     | TEXT      | The type of hardware.            |
 | purpose    | TEXT      | Purpose of file                  |
 | filename   | TEXT      | The name of the file.            |
-| created_at | INTEGER   | Date when file was created       |
-| bytes      | INTEGER   |                                  |
+| created_at | INTEGER   | Date when file was created.       |
+| bytes      | INTEGER   | Size of the file on disk in bytes. |

From ca2327a021f41c1c90ae8ddd1e3ab96492397065 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Wed, 26 Feb 2025 13:49:34 +0700
Subject: [PATCH 2/7] fix: correct RAM usage (#2019)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/utils/hardware/ram_info.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/engine/utils/hardware/ram_info.h b/engine/utils/hardware/ram_info.h
index 1ee4a55f7..14a48d798 100644
--- a/engine/utils/hardware/ram_info.h
+++ b/engine/utils/hardware/ram_info.h
@@ -17,12 +17,12 @@ inline Memory GetMemoryInfo() {
   hwinfo::Memory m;
 #if defined(__APPLE__) && defined(__MACH__)
   int64_t total_memory = 0;
-  int64_t used_memory = 0;
+  int64_t avail_memory = 0;
 
   size_t length = sizeof(total_memory);
   sysctlbyname("hw.memsize", &total_memory, &length, NULL, 0);
 
-  // Get used memory (this is a rough estimate)
+  // Get avail memory (this is a rough estimate)
   vm_size_t page_size;
   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
 
@@ -31,12 +31,10 @@ inline Memory GetMemoryInfo() {
 
   if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&vm_stat,
                       &count) == KERN_SUCCESS) {
-    used_memory =
-        (vm_stat.active_count + vm_stat.inactive_count + vm_stat.wire_count) *
-        page_size;
+    avail_memory = (vm_stat.free_count + vm_stat.inactive_count) * page_size;
   }
   return Memory{.total_MiB = ByteToMiB(total_memory),
-                .available_MiB = ByteToMiB(total_memory - used_memory)};
+                .available_MiB = ByteToMiB(avail_memory)};
 #elif defined(__linux__) || defined(_WIN32)
   return Memory{.total_MiB = ByteToMiB(m.total_Bytes()),
                 .available_MiB = ByteToMiB(m.available_Bytes())};

From 510ae28cc8db809d842f06e5838917c6fcee4932 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 27 Feb 2025 08:58:58 +0700
Subject: [PATCH 3/7] fix: default GPUs setting (#2029)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/common/hardware_common.h               | 11 ++++++
 engine/services/hardware_service.cc           | 36 +++++++++++++------
 engine/utils/hardware/gpu/vulkan/vulkan_gpu.h | 36 +++++++++++--------
 engine/utils/hardware/gpu_info.h              |  5 +--
 4 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/engine/common/hardware_common.h b/engine/common/hardware_common.h
index b3822b116..4dc2e2c35 100644
--- a/engine/common/hardware_common.h
+++ b/engine/common/hardware_common.h
@@ -69,6 +69,16 @@ struct NvidiaAddInfo {
 };
 struct AmdAddInfo {};
 using GPUAddInfo = std::variant<NvidiaAddInfo, AmdAddInfo>;
+
+enum class GpuType {
+  kGpuTypeOther = 0,
+  kGpuTypeIntegrated = 1,
+  kGpuTypeDiscrete = 2,
+  kGpuTypeVirtual = 3,
+  kGpuTypeCpu = 4,
+  kGpuTypeMaxEnum = 0x7FFFFFFF
+};
+
 struct GPU {
   std::string id;
   uint32_t device_id;
@@ -80,6 +90,7 @@ struct GPU {
   std::string uuid;
   bool is_activated = true;
   std::string vendor;
+  GpuType gpu_type;
 };
 
 inline Json::Value ToJson(const std::vector<GPU>& gpus) {
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 56ecadd6d..88a5df6b0 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -322,23 +322,39 @@ void HardwareService::UpdateHardwareInfos() {
     }
   }
   CTL_INF("Activated GPUs before: " << debug_b);
+  auto has_nvidia = [&gpus] {
+    for (auto const& g : gpus) {
+      if (g.vendor == cortex::hw::kNvidiaStr) {
+        return true;
+      }
+    }
+    return false;
+  }();
+
   for (auto const& gpu : gpus) {
-    // ignore error
-    // Note: only support NVIDIA for now, so hardware_id = software_id
     if (db_service_->HasHardwareEntry(gpu.uuid)) {
       auto res = db_service_->UpdateHardwareEntry(gpu.uuid, std::stoi(gpu.id),
-                                           std::stoi(gpu.id));
+                                                  std::stoi(gpu.id));
       if (res.has_error()) {
         CTL_WRN(res.error());
       }
     } else {
-      auto res =
-      db_service_->AddHardwareEntry(HwEntry{.uuid = gpu.uuid,
-                                         .type = "gpu",
-                                         .hardware_id = std::stoi(gpu.id),
-                                         .software_id = std::stoi(gpu.id),
-                                         .activated = true,
-                                         .priority = INT_MAX});
+      // iGPU should be deactivated by default
+      // Only activate Nvidia GPUs if both AMD and Nvidia GPUs exists
+      auto activated = [&gpu, &gpus, has_nvidia] {
+        if (gpu.gpu_type != cortex::hw::GpuType::kGpuTypeDiscrete)
+          return false;
+        if (has_nvidia && gpu.vendor != cortex::hw::kNvidiaStr)
+          return false;
+        return true;
+      };
+      auto res = db_service_->AddHardwareEntry(
+          HwEntry{.uuid = gpu.uuid,
+                  .type = "gpu",
+                  .hardware_id = std::stoi(gpu.id),
+                  .software_id = std::stoi(gpu.id),
+                  .activated = activated(),
+                  .priority = INT_MAX});
       if (res.has_error()) {
         CTL_WRN(res.error());
       }
diff --git a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
index 4969794d1..27899ca77 100644
--- a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
+++ b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
@@ -24,21 +24,26 @@
 #endif
 
 namespace cortex::hw {
-constexpr const uint32_t NVIDIA_VENDOR = 0x10DE;
-constexpr const uint32_t AMD_VENDOR = 0x1002;
-constexpr const uint32_t INTEL_VENDOR = 0x8086;
-constexpr const uint32_t ARM_VENDOR = 0x13B5;
+constexpr const uint32_t kNvidiaVendor = 0x10DE;
+constexpr const uint32_t kAmdVendor = 0x1002;
+constexpr const uint32_t kIntelVendor = 0x8086;
+constexpr const uint32_t kArmVendor = 0x13B5;
+
+constexpr const auto kAmdStr = "AMD";
+constexpr const auto kNvidiaStr = "NVIDIA";
+constexpr const auto kIntelStr = "INTEL";
+constexpr const auto kArmStr = "ARM";
 
 inline std::string GetVendorStr(uint32_t vendor_id) {
   switch (vendor_id) {
-    case AMD_VENDOR:
-      return "AMD";
-    case NVIDIA_VENDOR:
-      return "NVIDIA";
-    case INTEL_VENDOR:
-      return "INTEL";
-    case ARM_VENDOR:
-      return "ARM";
+    case kAmdVendor:
+      return kAmdStr;
+    case kNvidiaVendor:
+      return kNvidiaStr;
+    case kIntelVendor:
+      return kIntelStr;
+    case kArmVendor:
+      return kArmStr;
     default:
       return std::to_string(vendor_id);
   }
@@ -446,8 +451,8 @@ class VulkanGpu {
 #endif
       int free_vram_MiB =
           total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
-      if (device_properties.vendorID == NVIDIA_VENDOR ||
-          device_properties.vendorID == AMD_VENDOR) {
+      if (device_properties.vendorID == kNvidiaVendor ||
+          device_properties.vendorID == kAmdVendor) {
         gpus.emplace_back(cortex::hw::GPU{
             .id = std::to_string(id),
             .device_id = device_properties.deviceID,
@@ -457,7 +462,8 @@ class VulkanGpu {
             .free_vram = free_vram_MiB,
             .total_vram = total_vram_MiB,
             .uuid = uuid_to_string(device_id_properties.deviceUUID),
-            .vendor = GetVendorStr(device_properties.vendorID)});
+            .vendor = GetVendorStr(device_properties.vendorID),
+            .gpu_type = static_cast<GpuType>(device_properties.deviceType)});
       }
       id++;
     }
diff --git a/engine/utils/hardware/gpu_info.h b/engine/utils/hardware/gpu_info.h
index 14096d4bb..1a2a5319c 100644
--- a/engine/utils/hardware/gpu_info.h
+++ b/engine/utils/hardware/gpu_info.h
@@ -25,7 +25,7 @@ inline std::vector<GPU> GetGPUInfo() {
             .compute_cap = nvidia_gpus[i].compute_cap.value_or("unknown")};
         vulkan_gpus[j].free_vram = std::stoll(nvidia_gpus[i].vram_free);
         vulkan_gpus[j].total_vram = std::stoll(nvidia_gpus[i].vram_total);
-        vulkan_gpus[j].vendor = nvidia_gpus[i].vendor;
+        vulkan_gpus[j].vendor = nvidia_gpus[i].vendor;        
       }
     }
   }
@@ -55,7 +55,8 @@ inline std::vector<GPU> GetGPUInfo() {
               .free_vram = std::stoi(n.vram_free),
               .total_vram = std::stoi(n.vram_total),
               .uuid = n.uuid,
-              .vendor = n.vendor});
+              .vendor = n.vendor,
+              .gpu_type = GpuType::kGpuTypeDiscrete});
     }
     return res;
   }

From 9f1a50f8d0ba8c8a95b6ecbe9b990a0753f742ab Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 27 Feb 2025 09:22:25 +0700
Subject: [PATCH 4/7] fix: add default cpu_threads (#1948)

* fix: add default cpu_threads

* fix: use half of cpu threads

---------

Co-authored-by: vansangpfiev <sang@jan.ai>
---
 engine/services/model_service.cc                |  9 +++++++++
 engine/services/model_service.h                 |  2 ++
 engine/utils/hardware/gguf/gguf_file.h          | 12 ++++++++----
 engine/utils/hardware/gguf/gguf_file_estimate.h |  2 +-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 979cf9342..aeef54605 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -945,6 +945,11 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
 
     json_helper::MergeJson(json_data, params_override);
 
+    // Set default cpu_threads if it is not configured
+    if (!json_data.isMember("cpu_threads")) {
+      json_data["cpu_threads"] = GetCpuThreads();
+    }
+
     // Set the latest ctx_len
     if (ctx_len) {
       json_data["ctx_len"] =
@@ -1329,6 +1334,10 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
   return warning;
 }
 
+int ModelService::GetCpuThreads() const {
+  return std::max(std::thread::hardware_concurrency() / 2, 1u);
+}
+
 cpp::result<std::shared_ptr<ModelMetadata>, std::string>
 ModelService::GetModelMetadata(const std::string& model_id) const {
   if (model_id.empty()) {
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 17f2c0ddb..dcf99430f 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -112,6 +112,8 @@ class ModelService {
       const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
       int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
 
+  int GetCpuThreads() const;
+
   std::shared_ptr<DatabaseService> db_service_;
   std::shared_ptr<HardwareService> hw_service_;
   std::shared_ptr<DownloadService> download_service_;
diff --git a/engine/utils/hardware/gguf/gguf_file.h b/engine/utils/hardware/gguf/gguf_file.h
index 361668242..640c1b49f 100644
--- a/engine/utils/hardware/gguf/gguf_file.h
+++ b/engine/utils/hardware/gguf/gguf_file.h
@@ -7,11 +7,11 @@
 #include <filesystem>
 #include <iostream>
 #include <memory>
+#include <optional>
 #include <string>
 #include <unordered_set>
 #include <variant>
 #include <vector>
-#include <optional>
 
 #ifdef _WIN32
 #include <io.h>
@@ -23,8 +23,8 @@
 #endif
 
 #include "ggml.h"
-#include "utils/string_utils.h"
 #include "utils/logging_utils.h"
+#include "utils/string_utils.h"
 
 // #define GGUF_LOG(msg)                                                  \
 //   do {                                                                 \
@@ -246,11 +246,15 @@ struct GGUFHelper {
     file_size = std::filesystem::file_size(file_path);
 
     int fd = open(file_path.c_str(), O_RDONLY);
+    if (fd == -1) {
+      CTL_INF("Failed to open file: " << file_path << ", error: " << errno);
+      return false;
+    }
     // Memory-map the file
     data = static_cast<uint8_t*>(
         mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0));
     if (data == MAP_FAILED) {
-      perror("Error mapping file");
+      CTL_INF("Error mapping file");
       close(fd);
       return false;
     }
@@ -482,7 +486,7 @@ struct GGUFFile {
 inline std::optional<GGUFFile> ParseGgufFile(const std::string& path) {
   GGUFFile gf;
   GGUFHelper h;
-  if(!h.OpenAndMMap(path)) {
+  if (!h.OpenAndMMap(path)) {
     return std::nullopt;
   }
 
diff --git a/engine/utils/hardware/gguf/gguf_file_estimate.h b/engine/utils/hardware/gguf/gguf_file_estimate.h
index 12a7e72e1..402a70958 100644
--- a/engine/utils/hardware/gguf/gguf_file_estimate.h
+++ b/engine/utils/hardware/gguf/gguf_file_estimate.h
@@ -64,7 +64,6 @@ inline float GetQuantBit(const std::string& kv_cache_t) {
 
 inline std::optional<Estimation> EstimateLLaMACppRun(
     const std::string& file_path, const RunConfig& rc) {
-  Estimation res;
   // token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit/16 bytes
   //RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? Output_layer_size +  (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - Output_layer_size) : 0  )  (bytes)
 
@@ -72,6 +71,7 @@ inline std::optional<Estimation> EstimateLLaMACppRun(
   auto gf = ParseGgufFile(file_path);
   if (!gf)
     return std::nullopt;
+  Estimation res;
   int32_t embedding_length = 0;
   int64_t n_vocab = 0;
   int32_t num_block = 0;

From b9e9e154fdf710e73c1a0d6e95363f8c1c080409 Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 27 Feb 2025 10:16:50 +0700
Subject: [PATCH 5/7] fix: add model author for model source (#2038)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/services/model_source_service.cc | 93 +++++++++++++++++--------
 engine/services/model_source_service.h  | 12 ++--
 engine/utils/huggingface_utils.h        | 20 ++++++
 engine/utils/url_parser.h               | 10 ++-
 4 files changed, 98 insertions(+), 37 deletions(-)

diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
index 59275e8db..3314fd53e 100644
--- a/engine/services/model_source_service.cc
+++ b/engine/services/model_source_service.cc
@@ -14,6 +14,13 @@
 namespace hu = huggingface_utils;
 
 namespace {
+constexpr const int kModeSourceCacheSecs = 600;
+
+std::string GenSourceId(const std::string& author_hub,
+                        const std::string& model_name) {
+  return author_hub + "/" + model_name;
+}
+
 std::vector<ModelInfo> ParseJsonString(const std::string& json_str) {
   std::vector<ModelInfo> models;
 
@@ -79,19 +86,34 @@ cpp::result<bool, std::string> ModelSourceService::AddModelSource(
     }
 
     if (auto is_org = r.pathParams.size() == 1; is_org) {
-      auto& author = r.pathParams[0];
-      if (author == "cortexso") {
-        return AddCortexsoOrg(model_source);
-      } else {
-        return AddHfOrg(model_source, author);
-      }
+      return cpp::fail("Only support repository model source, url: " +
+                       model_source);
+      // TODO(sang)
+      // auto& hub_author = r.pathParams[0];
+      // if (hub_author == "cortexso") {
+      //   return AddCortexsoOrg(model_source);
+      // } else {
+      //   return AddHfOrg(model_source, hub_author);
+      // }
     } else {  // Repo
-      auto const& author = r.pathParams[0];
+      auto const& hub_author = r.pathParams[0];
       auto const& model_name = r.pathParams[1];
+      // Return cache value
+      if (auto key = GenSourceId(hub_author, model_name);
+          src_cache_.find(key) != src_cache_.end()) {
+        auto now = std::chrono::system_clock::now();
+        if (std::chrono::duration_cast<std::chrono::seconds>(now -
+                                                             src_cache_.at(key))
+                .count() < kModeSourceCacheSecs) {
+          CTL_DBG("Return cache value for model source: " << model_source);
+          return true;
+        }
+      }
+
       if (r.pathParams[0] == "cortexso") {
-        return AddCortexsoRepo(model_source, author, model_name);
+        return AddCortexsoRepo(model_source, hub_author, model_name);
       } else {
-        return AddHfRepo(model_source, author, model_name);
+        return AddHfRepo(model_source, hub_author, model_name);
       }
     }
   }
@@ -190,9 +212,9 @@ cpp::result<ModelSource, std::string> ModelSourceService::GetModelSource(
 }
 
 cpp::result<std::vector<std::string>, std::string>
-ModelSourceService::GetRepositoryList(std::string_view author,
+ModelSourceService::GetRepositoryList(std::string_view hub_author,
                                       std::string_view tag_filter) {
-  std::string as(author);
+  std::string as(hub_author);
   auto get_repo_list = [this, &as, &tag_filter] {
     std::vector<std::string> repo_list;
     auto const& mis = cortexso_repos_.at(as);
@@ -227,9 +249,9 @@ ModelSourceService::GetRepositoryList(std::string_view author,
 }
 
 cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
-    const std::string& model_source, const std::string& author) {
+    const std::string& model_source, const std::string& hub_author) {
   auto res = curl_utils::SimpleGet("https://huggingface.co/api/models?author=" +
-                                   author);
+                                   hub_author);
   if (res.has_value()) {
     auto models = ParseJsonString(res.value());
     // Add new models
@@ -238,9 +260,10 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
 
       auto author_model = string_utils::SplitBy(m.id, "/");
       if (author_model.size() == 2) {
-        auto const& author = author_model[0];
+        auto const& hub_author = author_model[0];
         auto const& model_name = author_model[1];
-        auto r = AddHfRepo(model_source + "/" + model_name, author, model_name);
+        auto r =
+            AddHfRepo(model_source + "/" + model_name, hub_author, model_name);
         if (r.has_error()) {
           CTL_WRN(r.error());
         }
@@ -253,14 +276,14 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
 }
 
 cpp::result<bool, std::string> ModelSourceService::AddHfRepo(
-    const std::string& model_source, const std::string& author,
+    const std::string& model_source, const std::string& hub_author,
     const std::string& model_name) {
   // Get models from db
 
   auto model_list_before = db_service_->GetModels(model_source)
                                .value_or(std::vector<cortex::db::ModelEntry>{});
   std::unordered_set<std::string> updated_model_list;
-  auto add_res = AddRepoSiblings(model_source, author, model_name);
+  auto add_res = AddRepoSiblings(model_source, hub_author, model_name);
   if (add_res.has_error()) {
     return cpp::fail(add_res.error());
   } else {
@@ -274,15 +297,17 @@ cpp::result<bool, std::string> ModelSourceService::AddHfRepo(
       }
     }
   }
+  src_cache_[GenSourceId(hub_author, model_name)] =
+      std::chrono::system_clock::now();
   return true;
 }
 
 cpp::result<std::unordered_set<std::string>, std::string>
 ModelSourceService::AddRepoSiblings(const std::string& model_source,
-                                    const std::string& author,
+                                    const std::string& hub_author,
                                     const std::string& model_name) {
   std::unordered_set<std::string> res;
-  auto repo_info = hu::GetHuggingFaceModelRepoInfo(author, model_name);
+  auto repo_info = hu::GetHuggingFaceModelRepoInfo(hub_author, model_name);
   if (repo_info.has_error()) {
     return cpp::fail(repo_info.error());
   }
@@ -293,14 +318,14 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
         "supported.");
   }
 
-  auto siblings_fs = hu::GetSiblingsFileSize(author, model_name);
+  auto siblings_fs = hu::GetSiblingsFileSize(hub_author, model_name);
 
   if (siblings_fs.has_error()) {
-    return cpp::fail("Could not get siblings file size: " + author + "/" +
-                     model_name);
+    return cpp::fail("Could not get siblings file size: " +
+                     GenSourceId(hub_author, model_name));
   }
 
-  auto readme = hu::GetReadMe(author, model_name);
+  auto readme = hu::GetReadMe(hub_author, model_name);
   std::string desc;
   if (!readme.has_error()) {
     desc = readme.value();
@@ -326,10 +351,10 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
             siblings_fs_v.file_sizes.at(sibling.rfilename).size_in_bytes;
       }
       std::string model_id =
-          author + ":" + model_name + ":" + sibling.rfilename;
+          hub_author + ":" + model_name + ":" + sibling.rfilename;
       cortex::db::ModelEntry e = {
           .model = model_id,
-          .author_repo_id = author,
+          .author_repo_id = hub_author,
           .branch_name = "main",
           .path_to_model_yaml = "",
           .model_alias = "",
@@ -369,9 +394,9 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
       CTL_INF(m.id);
       auto author_model = string_utils::SplitBy(m.id, "/");
       if (author_model.size() == 2) {
-        auto const& author = author_model[0];
+        auto const& hub_author = author_model[0];
         auto const& model_name = author_model[1];
-        auto r = AddCortexsoRepo(model_source + "/" + model_name, author,
+        auto r = AddCortexsoRepo(model_source + "/" + model_name, hub_author,
                                  model_name);
         if (r.has_error()) {
           CTL_WRN(r.error());
@@ -386,7 +411,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
 }
 
 cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
-    const std::string& model_source, const std::string& author,
+    const std::string& model_source, const std::string& hub_author,
     const std::string& model_name) {
   auto begin = std::chrono::system_clock::now();
   auto branches =
@@ -395,17 +420,23 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
     return cpp::fail(branches.error());
   }
 
-  auto repo_info = hu::GetHuggingFaceModelRepoInfo(author, model_name);
+  auto repo_info = hu::GetHuggingFaceModelRepoInfo(hub_author, model_name);
   if (repo_info.has_error()) {
     return cpp::fail(repo_info.error());
   }
 
-  auto readme = hu::GetReadMe(author, model_name);
+  auto readme = hu::GetReadMe(hub_author, model_name);
   std::string desc;
   if (!readme.has_error()) {
     desc = readme.value();
   }
 
+  auto author = hub_author;
+  if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
+      model_author.has_value() && !model_author->empty()) {
+    author = *model_author;
+  }
+
   // Get models from db
   auto model_list_before = db_service_->GetModels(model_source)
                                .value_or(std::vector<cortex::db::ModelEntry>{});
@@ -442,6 +473,8 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
       "Duration ms: " << std::chrono::duration_cast<std::chrono::milliseconds>(
                              end - begin)
                              .count());
+  src_cache_[GenSourceId(hub_author, model_name)] =
+      std::chrono::system_clock::now();
   return true;
 }
 
diff --git a/engine/services/model_source_service.h b/engine/services/model_source_service.h
index cffe93bb9..54acae380 100644
--- a/engine/services/model_source_service.h
+++ b/engine/services/model_source_service.h
@@ -65,25 +65,25 @@ class ModelSourceService {
   cpp::result<ModelSource, std::string> GetModelSource(const std::string& src);
 
   cpp::result<std::vector<std::string>, std::string> GetRepositoryList(
-      std::string_view author, std::string_view tag_filter);
+      std::string_view hub_author, std::string_view tag_filter);
 
  private:
   cpp::result<bool, std::string> AddHfOrg(const std::string& model_source,
-                                          const std::string& author);
+                                          const std::string& hub_author);
 
   cpp::result<bool, std::string> AddHfRepo(const std::string& model_source,
-                                           const std::string& author,
+                                           const std::string& hub_author,
                                            const std::string& model_name);
 
   cpp::result<std::unordered_set<std::string>, std::string> AddRepoSiblings(
-      const std::string& model_source, const std::string& author,
+      const std::string& model_source, const std::string& hub_author,
       const std::string& model_name);
 
   cpp::result<bool, std::string> AddCortexsoOrg(
       const std::string& model_source);
 
   cpp::result<bool, std::string> AddCortexsoRepo(
-      const std::string& model_source, const std::string& author,
+      const std::string& model_source, const std::string& hub_author,
       const std::string& model_name);
 
   cpp::result<std::string, std::string> AddCortexsoRepoBranch(
@@ -99,4 +99,6 @@ class ModelSourceService {
   std::atomic<bool> running_;
 
   std::unordered_map<std::string, std::vector<ModelInfo>> cortexso_repos_;
+  using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
+  std::unordered_map<std::string, TimePoint> src_cache_;
 };
\ No newline at end of file
diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h
index e5c74a6e1..fde5d11b2 100644
--- a/engine/utils/huggingface_utils.h
+++ b/engine/utils/huggingface_utils.h
@@ -311,4 +311,24 @@ inline std::optional<std::string> GetDefaultBranch(
     return std::nullopt;
   }
 }
+
+inline std::optional<std::string> GetModelAuthorCortexsoHub(
+    const std::string& model_name) {
+  try {
+    auto remote_yml = curl_utils::ReadRemoteYaml(GetMetadataUrl(model_name));
+
+    if (remote_yml.has_error()) {
+      return std::nullopt;
+    }
+
+    auto metadata = remote_yml.value();
+    auto author = metadata["author"];
+    if (author.IsDefined()) {
+      return author.as<std::string>();
+    }
+    return std::nullopt;
+  } catch (const std::exception& e) {
+    return std::nullopt;
+  }
+}
 }  // namespace huggingface_utils
diff --git a/engine/utils/url_parser.h b/engine/utils/url_parser.h
index 244b13719..4802ba1a1 100644
--- a/engine/utils/url_parser.h
+++ b/engine/utils/url_parser.h
@@ -69,8 +69,11 @@ const std::regex url_regex(
     R"(^(([^:\/?#]+):)?(//([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?)",
     std::regex::extended);
 
-inline void SplitPathParams(const std::string& input,
+inline bool SplitPathParams(const std::string& input,
                             std::vector<std::string>& pathList) {
+  if (input.find("//") != std::string::npos) {
+    return false;
+  }
   // split the path by '/'
   std::string token;
   std::istringstream tokenStream(input);
@@ -80,6 +83,7 @@ inline void SplitPathParams(const std::string& input,
     }
     pathList.push_back(token);
   }
+  return true;
 }
 
 inline cpp::result<Url, std::string> FromUrlString(
@@ -105,7 +109,9 @@ inline cpp::result<Url, std::string> FromUrlString(
       } else if (counter == hostAndPortIndex) {
         url.host = res;  // TODO: split the port for completeness
       } else if (counter == pathIndex) {
-        SplitPathParams(res, url.pathParams);
+        if (!SplitPathParams(res, url.pathParams)) {
+          return cpp::fail("Malformed URL: " + urlString);
+        }
       } else if (counter == queryIndex) {
         // TODO: implement
       }

From c8503da4cc09c4ba17341bce4387e841ab3bfdfd Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 27 Feb 2025 11:58:10 +0700
Subject: [PATCH 6/7] chore: refactor background tasks processing (#2031)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/controllers/models.cc        |  9 +++--
 engine/main.cc                      |  8 +++--
 engine/services/hardware_service.cc |  4 +--
 engine/services/model_service.cc    | 54 +++++++++++++++++++++++++++--
 engine/services/model_service.h     | 20 +++++++----
 engine/utils/task_queue.h           | 45 ++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 20 deletions(-)
 create mode 100644 engine/utils/task_queue.h

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 7439a5df5..86b749ce6 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -218,11 +218,10 @@ void Models::ListModel(
           obj["id"] = model_entry.model;
           obj["model"] = model_entry.model;
           obj["status"] = "downloaded";
-          // TODO(sang) Temporarily remove this estimation
-          // auto es = model_service_->GetEstimation(model_entry.model);
-          // if (es.has_value() && !!es.value()) {
-          //   obj["recommendation"] = hardware::ToJson(*(es.value()));
-          // }
+          auto es = model_service_->GetEstimation(model_entry.model);
+          if (es.has_value()) {
+            obj["recommendation"] = hardware::ToJson(*es);
+          }
           data.append(std::move(obj));
           yaml_handler.Reset();
         } else if (model_config.engine == kPythonEngine) {
diff --git a/engine/main.cc b/engine/main.cc
index 122ea094a..2f60916a6 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -37,6 +37,7 @@
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/system_info_utils.h"
+#include "utils/task_queue.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -177,8 +178,11 @@ void RunServer(std::optional<std::string> host, std::optional<int> port,
       download_service, dylib_path_manager, db_service);
   auto inference_svc = std::make_shared<InferenceService>(engine_service);
   auto model_src_svc = std::make_shared<ModelSourceService>(db_service);
-  auto model_service = std::make_shared<ModelService>(
-      db_service, hw_service, download_service, inference_svc, engine_service);
+  cortex::TaskQueue task_queue(
+      std::min(2u, std::thread::hardware_concurrency()), "background_task");
+  auto model_service =
+      std::make_shared<ModelService>(db_service, hw_service, download_service,
+                                     inference_svc, engine_service, task_queue);
   inference_svc->SetModelService(model_service);
 
   auto file_watcher_srv = std::make_shared<FileWatcherService>(
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 88a5df6b0..817ab320b 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -207,9 +207,6 @@ bool HardwareService::Restart(const std::string& host, int port) {
     if (!TryConnectToServer(host, port)) {
       return false;
     }
-    std::cout << "Server started" << std::endl;
-    std::cout << "API Documentation available at: http://" << host << ":"
-              << port << std::endl;
   }
 
 #endif
@@ -348,6 +345,7 @@ void HardwareService::UpdateHardwareInfos() {
           return false;
         return true;
       };
+
       auto res = db_service_->AddHardwareEntry(
           HwEntry{.uuid = gpu.uuid,
                   .type = "gpu",
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index aeef54605..c13f7cf19 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -143,6 +143,21 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
 }
 }  // namespace
 
+ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
+                           std::shared_ptr<HardwareService> hw_service,
+                           std::shared_ptr<DownloadService> download_service,
+                           std::shared_ptr<InferenceService> inference_service,
+                           std::shared_ptr<EngineServiceI> engine_svc,
+                           cortex::TaskQueue& task_queue)
+    : db_service_(db_service),
+      hw_service_(hw_service),
+      download_service_{download_service},
+      inference_svc_(inference_service),
+      engine_svc_(engine_svc),
+      task_queue_(task_queue) {
+  ProcessBgrTasks();
+};
+
 void ModelService::ForceIndexingModelList() {
   CTL_INF("Force indexing model list");
 
@@ -331,8 +346,17 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
   return download_service_->AddTask(downloadTask, on_finished);
 }
 
+std::optional<hardware::Estimation> ModelService::GetEstimation(
+    const std::string& model_handle) {
+  std::lock_guard l(es_mtx_);
+  if (auto it = es_.find(model_handle); it != es_.end()) {
+    return it->second;
+  }
+  return std::nullopt;
+}
+
 cpp::result<std::optional<hardware::Estimation>, std::string>
-ModelService::GetEstimation(const std::string& model_handle,
+ModelService::EstimateModel(const std::string& model_handle,
                             const std::string& kv_cache, int n_batch,
                             int n_ubatch) {
   namespace fs = std::filesystem;
@@ -548,7 +572,7 @@ ModelService::DownloadModelFromCortexsoAsync(
           // Close the file
           pyvenv_cfg.close();
           // Add executable permission to python
-          set_permission_utils::SetExecutePermissionsRecursive(venv_path);
+          (void)set_permission_utils::SetExecutePermissionsRecursive(venv_path);
         } else {
           CTL_ERR("Failed to extract venv.zip");
         };
@@ -828,7 +852,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
             CTL_WRN("Error: " + res.error());
             for (auto& depend : depends) {
               if (depend != model_handle) {
-                StopModel(depend);
+                auto sr = StopModel(depend);
               }
             }
             return cpp::fail("Model failed to start dependency '" + depend +
@@ -1390,4 +1414,28 @@ std::string ModelService::GetEngineByModelId(
   auto mc = yaml_handler.GetModelConfig();
   CTL_DBG(mc.engine);
   return mc.engine;
+}
+
+void ModelService::ProcessBgrTasks() {
+  CTL_INF("Start processing background tasks")
+  auto cb = [this] {
+    CTL_DBG("Estimate model resource usage");
+    auto list_entry = db_service_->LoadModelList();
+    if (list_entry) {
+      for (const auto& model_entry : list_entry.value()) {
+        // Only process local models
+        if (model_entry.status == cortex::db::ModelStatus::Downloaded) {
+          auto es = EstimateModel(model_entry.model);
+          if (es.has_value()) {
+            std::lock_guard l(es_mtx_);
+            es_[model_entry.model] = es.value();
+          }
+        }
+      }
+    }
+  };
+
+  auto clone = cb;
+  task_queue_.RunInQueue(std::move(cb));
+  task_queue_.RunEvery(std::chrono::seconds(10), std::move(clone));
 }
\ No newline at end of file
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index dcf99430f..04c7f240a 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -10,6 +10,7 @@
 #include "services/download_service.h"
 #include "services/hardware_service.h"
 #include "utils/hardware/gguf/gguf_file_estimate.h"
+#include "utils/task_queue.h"
 
 class InferenceService;
 
@@ -35,12 +36,8 @@ class ModelService {
                         std::shared_ptr<HardwareService> hw_service,
                         std::shared_ptr<DownloadService> download_service,
                         std::shared_ptr<InferenceService> inference_service,
-                        std::shared_ptr<EngineServiceI> engine_svc)
-      : db_service_(db_service),
-        hw_service_(hw_service),
-        download_service_{download_service},
-        inference_svc_(inference_service),
-        engine_svc_(engine_svc) {};
+                        std::shared_ptr<EngineServiceI> engine_svc,
+                        cortex::TaskQueue& task_queue);
 
   cpp::result<std::string, std::string> AbortDownloadModel(
       const std::string& task_id);
@@ -81,7 +78,10 @@ class ModelService {
 
   bool HasModel(const std::string& id) const;
 
-  cpp::result<std::optional<hardware::Estimation>, std::string> GetEstimation(
+  std::optional<hardware::Estimation> GetEstimation(
+      const std::string& model_handle);
+
+  cpp::result<std::optional<hardware::Estimation>, std::string> EstimateModel(
       const std::string& model_handle, const std::string& kv_cache = "f16",
       int n_batch = 2048, int n_ubatch = 2048);
 
@@ -112,6 +112,8 @@ class ModelService {
       const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
       int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
 
+  void ProcessBgrTasks();
+  
   int GetCpuThreads() const;
 
   std::shared_ptr<DatabaseService> db_service_;
@@ -126,4 +128,8 @@ class ModelService {
    */
   std::unordered_map<std::string, std::shared_ptr<ModelMetadata>>
       loaded_model_metadata_map_;
+
+  std::mutex es_mtx_;
+  std::unordered_map<std::string, std::optional<hardware::Estimation>> es_;
+  cortex::TaskQueue& task_queue_;
 };
diff --git a/engine/utils/task_queue.h b/engine/utils/task_queue.h
new file mode 100644
index 000000000..911a7b307
--- /dev/null
+++ b/engine/utils/task_queue.h
@@ -0,0 +1,45 @@
+#pragma once
+#include <memory>
+#include <string>
+#include "trantor/net/EventLoopThreadPool.h"
+
+namespace cortex {
+class TaskQueue {
+ public:
+  TaskQueue(size_t num_threads, const std::string& name)
+      : ev_loop_pool_(
+            std::make_unique<trantor::EventLoopThreadPool>(num_threads, name)) {
+    ev_loop_pool_->start();
+  }
+  ~TaskQueue() {}
+
+  template <typename Functor>
+  void RunInQueue(Functor&& f) {
+    if (ev_loop_pool_) {
+      ev_loop_pool_->getNextLoop()->runInLoop(std::forward<Functor>(f));
+    }
+  }
+
+  template <typename Functor>
+  uint64_t RunEvery(const std::chrono::duration<double>& interval,
+                    Functor&& cb) {
+    if (ev_loop_pool_) {
+      return ev_loop_pool_->getNextLoop()->runEvery(interval,
+                                                    std::forward<Functor>(cb));
+    }
+    return 0;
+  }
+
+  template <typename Functor>
+  uint64_t RunAfter(const std::chrono::duration<double>& delay, Functor&& cb) {
+    if (ev_loop_pool_) {
+      return ev_loop_pool_->getNextLoop()->runAfter(delay,
+                                                    std::forward<Functor>(cb));
+    }
+    return 0;
+  }
+
+ private:
+  std::unique_ptr<trantor::EventLoopThreadPool> ev_loop_pool_ = nullptr;
+};
+}  // namespace cortex
\ No newline at end of file

From 2691939a30cabee30fb6ac9f7239a549091c22ab Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Thu, 27 Feb 2025 12:03:49 +0700
Subject: [PATCH 7/7] fix: correct Linux cpu usage (#2039)

Co-authored-by: sangjanai <sang@jan.ai>
---
 engine/services/hardware_service.cc |   2 +-
 engine/services/hardware_service.h  |   3 +-
 engine/utils/hardware/cpu_info.h    | 201 +++++++++++++++++++++++++---
 engine/utils/hardware/cpu_usage.h   | 162 ----------------------
 4 files changed, 184 insertions(+), 184 deletions(-)
 delete mode 100644 engine/utils/hardware/cpu_usage.h

diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 817ab320b..972647b51 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -52,7 +52,7 @@ HardwareInfo HardwareService::GetHardwareInfo() {
     };
   }
 
-  return HardwareInfo{.cpu = cortex::hw::GetCPUInfo(),
+  return HardwareInfo{.cpu = cpu_info_.GetCPUInfo(),
                       .os = cortex::hw::GetOSInfo(),
                       .ram = cortex::hw::GetMemoryInfo(),
                       .storage = cortex::hw::GetStorageInfo(),
diff --git a/engine/services/hardware_service.h b/engine/services/hardware_service.h
index a8a619d4f..680cd0961 100644
--- a/engine/services/hardware_service.h
+++ b/engine/services/hardware_service.h
@@ -1,8 +1,8 @@
 #pragma once
 #include <stdint.h>
+#include <mutex>
 #include <string>
 #include <vector>
-#include <mutex>
 
 #include "common/hardware_config.h"
 #include "database_service.h"
@@ -41,4 +41,5 @@ class HardwareService {
   std::shared_ptr<DatabaseService> db_service_ = nullptr;
   std::optional<cortex::hw::ActivateHardwareConfig> ahc_;
   std::mutex mtx_;
+  cortex::hw::CpuInfo cpu_info_;
 };
\ No newline at end of file
diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h
index 20ae30bc3..af7a85a4b 100644
--- a/engine/utils/hardware/cpu_info.h
+++ b/engine/utils/hardware/cpu_info.h
@@ -1,37 +1,198 @@
 #pragma once
 
 #include <json/json.h>
+#include <chrono>
+#include <iostream>
+#include <sstream>
 #include <string>
 #include <string_view>
+#include <thread>
 #include <vector>
+
+#ifdef _WIN32
+#include <pdh.h>
+#include <windows.h>
+#pragma comment(lib, "pdh.lib")
+#elif defined(__APPLE__) || defined(__MACH__)
+#include <mach/mach_host.h>
+#include <mach/mach_init.h>
+#else
+#include <unistd.h>
+#include <cmath>
+#include <fstream>
+#include <iterator>
+#endif
 #include "common/hardware_common.h"
-#include "cpu_usage.h"
 #include "hwinfo/hwinfo.h"
 #include "utils/cpuid/cpu_info.h"
 
 namespace cortex::hw {
-inline CPU GetCPUInfo() {
-  auto res = hwinfo::getAllCPUs();
-  if (res.empty())
-    return CPU{};
-  auto cpu = res[0];
-  cortex::cpuid::CpuInfo inst;
+struct Jiffies {
+  Jiffies() {
+    working = -1;
+    all = -1;
+  }
 
-#if defined(__linux__)
-  float usage = 0;
-  for (auto const& c : res) {
-    usage += c.currentUtilisation();
+  Jiffies(int64_t _all, int64_t _working) {
+    all = _all;
+    working = _working;
   }
-  usage = usage / res.size() * 100;
+
+  int64_t working;
+  int64_t all;
+};
+
+struct CpuInfo {
+ private:
+  cortex::cpuid::CpuInfo inst;
+  bool jiffies_initialized = false;
+
+ public:
+  double GetCPUUsage() {
+#if defined(_WIN32)
+    unsigned long long previous_total_ticks = 0;
+    unsigned long long previous_idle_ticks = 0;
+
+    auto calculate_cpu_load = [&](unsigned long long idle_ticks,
+                                  unsigned long long total_ticks) {
+      unsigned long long total_ticks_since_last_time =
+          total_ticks - previous_total_ticks;
+      unsigned long long idle_ticks_since_last_time =
+          idle_ticks - previous_idle_ticks;
+
+      float ret = 1.0f - ((total_ticks_since_last_time > 0)
+                              ? ((float)idle_ticks_since_last_time) /
+                                    total_ticks_since_last_time
+                              : 0);
+
+      previous_total_ticks = total_ticks;
+      previous_idle_ticks = idle_ticks;
+      return ret * 100;
+    };
+
+    auto file_time_to_int64 = [](const FILETIME& ft) {
+      return (((unsigned long long)(ft.dwHighDateTime)) << 32) |
+             ((unsigned long long)ft.dwLowDateTime);
+    };
+
+    FILETIME idle_time, kernel_time, user_time;
+    float res = 0;
+    constexpr const int kCount = 100;
+    for (int i = 0; i < kCount; i++) {
+      res += GetSystemTimes(&idle_time, &kernel_time, &user_time)
+                 ? calculate_cpu_load(file_time_to_int64(idle_time),
+                                      file_time_to_int64(kernel_time) +
+                                          file_time_to_int64(user_time))
+                 : -1.0f;
+      std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    return res < 0 ? -1.0f : res / kCount;
+
+#elif defined(__APPLE__) || defined(__MACH__)
+    // macOS implementation
+    host_cpu_load_info_data_t cpu_info;
+    mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
+
+    static unsigned long long previous_total_ticks = 0;
+    static unsigned long long previous_idle_ticks = 0;
+
+    if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO,
+                        (host_info_t)&cpu_info, &count) == KERN_SUCCESS) {
+      unsigned long long total_ticks = 0;
+      for (int i = 0; i < CPU_STATE_MAX; i++) {
+        total_ticks += cpu_info.cpu_ticks[i];
+      }
+
+      unsigned long long idle_ticks = cpu_info.cpu_ticks[CPU_STATE_IDLE];
+
+      unsigned long long total_ticks_since_last_time =
+          total_ticks - previous_total_ticks;
+      unsigned long long idle_ticks_since_last_time =
+          idle_ticks - previous_idle_ticks;
+
+      double cpu_usage = 1.0f - ((double)idle_ticks_since_last_time /
+                                 total_ticks_since_last_time);
+
+      previous_total_ticks = total_ticks;
+      previous_idle_ticks = idle_ticks;
+
+      return cpu_usage * 100.0;
+    }
+    return -1.0;
+
 #else
-  float usage = GetCPUUsage();
+    if (!jiffies_initialized) {
+      // Sleep 1 sec just for the start cause the usage needs to have a delta value which is depending on the unix file
+      // read it's just for the init, you don't need to wait if the delta is already created ...
+      std::this_thread::sleep_for(std::chrono::duration<double>(1));
+      jiffies_initialized = true;
+    }
+    
+    auto get_jiffies = [](int index) -> Jiffies {
+      std::ifstream filestat("/proc/stat");
+      if (!filestat.is_open()) {
+        return {};
+      }
+
+      for (int i = 0; i < index; ++i) {
+        if (!filestat.ignore(std::numeric_limits<std::streamsize>::max(),
+                             '\n')) {
+          break;
+        }
+      }
+      std::string line;
+      std::getline(filestat, line);
+
+      std::istringstream iss(line);
+      std::vector<std::string> results(std::istream_iterator<std::string>{iss},
+                                       std::istream_iterator<std::string>());
+
+      const int64_t jiffies_0 = std::stol(results[1]);
+      const int64_t jiffies_1 = std::stol(results[2]);
+      const int64_t jiffies_2 = std::stol(results[3]);
+      const int64_t jiffies_3 = std::stol(results[4]);
+      const int64_t jiffies_4 = std::stol(results[5]);
+      const int64_t jiffies_5 = std::stol(results[6]);
+      const int64_t jiffies_6 = std::stol(results[7]);
+      const int64_t jiffies_7 = std::stol(results[8]);
+      const int64_t jiffies_8 = std::stol(results[9]);
+      const int64_t jiffies_9 = std::stol(results[10]);
+
+      int64_t all = jiffies_0 + jiffies_1 + jiffies_2 + jiffies_3 + jiffies_4 +
+                    jiffies_5 + jiffies_6 + jiffies_7 + jiffies_8 + jiffies_9;
+      int64_t working = jiffies_0 + jiffies_1 + jiffies_2;
+
+      return {all, working};
+    };
+    static Jiffies last = Jiffies();
+
+    Jiffies current = get_jiffies(0);
+
+    auto total_over_period = static_cast<double>(current.all - last.all);
+    auto work_over_period = static_cast<double>(current.working - last.working);
+
+    last = current;
+
+    const double utilization = work_over_period / total_over_period;
+    if (utilization < 0 || utilization > 1 || std::isnan(utilization)) {
+      return -1.0;
+    }
+    return utilization * 100;
 #endif
+  }
 
-  // float usage = 0;
-  return CPU{.cores = cpu.numPhysicalCores(),
-             .arch = std::string(GetArch()),
-             .model = cpu.modelName(),
-             .usage = usage,
-             .instructions = inst.instructions()};
-}
+  CPU GetCPUInfo() {
+    auto res = hwinfo::getAllCPUs();
+    if (res.empty())
+      return CPU{};
+    auto cpu = res[0];
+    cortex::cpuid::CpuInfo inst;
+    float usage = GetCPUUsage();
+    return CPU{.cores = cpu.numPhysicalCores(),
+               .arch = std::string(GetArch()),
+               .model = cpu.modelName(),
+               .usage = usage,
+               .instructions = inst.instructions()};
+  }
+};
 }  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/cpu_usage.h b/engine/utils/hardware/cpu_usage.h
deleted file mode 100644
index 2bba32f09..000000000
--- a/engine/utils/hardware/cpu_usage.h
+++ /dev/null
@@ -1,162 +0,0 @@
-#pragma once
-#include <chrono>
-#include <iostream>
-#include <sstream>
-#include <thread>
-#include <vector>
-
-#ifdef _WIN32
-#include <pdh.h>
-#include <windows.h>
-#pragma comment(lib, "pdh.lib")
-#elif defined(__APPLE__) || defined(__MACH__)
-#include <mach/mach_host.h>
-#include <mach/mach_init.h>
-#else
-#include <unistd.h>
-#include <fstream>
-#endif
-#include "utils/logging_utils.h"
-
-namespace cortex::hw {
-inline double GetCPUUsage() {
-#if defined(_WIN32)
-  unsigned long long previous_total_ticks = 0;
-  unsigned long long previous_idle_ticks = 0;
-
-  auto calculate_cpu_load = [&](unsigned long long idle_ticks,
-                                unsigned long long total_ticks) {
-    unsigned long long total_ticks_since_last_time =
-        total_ticks - previous_total_ticks;
-    unsigned long long idle_ticks_since_last_time =
-        idle_ticks - previous_idle_ticks;
-
-    float ret = 1.0f - ((total_ticks_since_last_time > 0)
-                            ? ((float)idle_ticks_since_last_time) /
-                                  total_ticks_since_last_time
-                            : 0);
-
-    previous_total_ticks = total_ticks;
-    previous_idle_ticks = idle_ticks;
-    return ret * 100;
-  };
-
-  auto file_time_to_int64 = [](const FILETIME& ft) {
-    return (((unsigned long long)(ft.dwHighDateTime)) << 32) |
-           ((unsigned long long)ft.dwLowDateTime);
-  };
-
-  FILETIME idle_time, kernel_time, user_time;
-  float res = 0;
-  constexpr const int kCount = 100;
-  for (int i = 0; i < kCount; i++) {
-    res += GetSystemTimes(&idle_time, &kernel_time, &user_time)
-               ? calculate_cpu_load(file_time_to_int64(idle_time),
-                                    file_time_to_int64(kernel_time) +
-                                        file_time_to_int64(user_time))
-               : -1.0f;
-    std::this_thread::sleep_for(std::chrono::milliseconds(1));
-  }
-  return res < 0 ? -1.0f : res / kCount;
-
-#elif defined(__APPLE__) || defined(__MACH__)
-  // macOS implementation
-  host_cpu_load_info_data_t cpu_info;
-  mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
-
-  static unsigned long long previous_total_ticks = 0;
-  static unsigned long long previous_idle_ticks = 0;
-
-  if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO,
-                      (host_info_t)&cpu_info, &count) == KERN_SUCCESS) {
-    unsigned long long total_ticks = 0;
-    for (int i = 0; i < CPU_STATE_MAX; i++) {
-      total_ticks += cpu_info.cpu_ticks[i];
-    }
-
-    unsigned long long idle_ticks = cpu_info.cpu_ticks[CPU_STATE_IDLE];
-
-    unsigned long long total_ticks_since_last_time =
-        total_ticks - previous_total_ticks;
-    unsigned long long idle_ticks_since_last_time =
-        idle_ticks - previous_idle_ticks;
-
-    double cpu_usage = 1.0f - ((double)idle_ticks_since_last_time /
-                               total_ticks_since_last_time);
-
-    previous_total_ticks = total_ticks;
-    previous_idle_ticks = idle_ticks;
-
-    return cpu_usage * 100.0;
-  }
-  return -1.0;
-
-#else
-  // Linux implementation
-  std::vector<unsigned long long> last_total_user, last_total_user_low,
-      last_total_sys, last_total_idle;
-
-  std::vector<unsigned long long> total_user, total_user_low, total_sys,
-      total_idle;
-
-  std::ifstream stat_file("/proc/stat");
-  if (stat_file.is_open()) {
-    std::string line;
-    int cpu_count = 0;
-    double total_cpu_percentage = 0.0;
-
-    while (std::getline(stat_file, line)) {
-      if (line.substr(0, 3) != "cpu")
-        break;  // We only want lines that start with "cpu"
-
-      cpu_count++;
-      std::vector<unsigned long long> values;
-      std::istringstream iss(line);
-      std::string cpu;
-      iss >> cpu;
-      unsigned long long value;
-      while (iss >> value) {
-        values.push_back(value);
-      }
-
-      if (values.size() < 4)
-        continue;
-
-      total_user.push_back(values[0]);
-      total_user_low.push_back(values[1]);
-      total_sys.push_back(values[2]);
-      total_idle.push_back(values[3]);
-
-      if (last_total_user.size() < cpu_count) {
-        last_total_user.push_back(0);
-        last_total_user_low.push_back(0);
-        last_total_sys.push_back(0);
-        last_total_idle.push_back(0);
-      }
-
-      unsigned long long total =
-          (total_user[cpu_count - 1] - last_total_user[cpu_count - 1]) +
-          (total_user_low[cpu_count - 1] - last_total_user_low[cpu_count - 1]) +
-          (total_sys[cpu_count - 1] - last_total_sys[cpu_count - 1]);
-      double percent = total;
-      total += (total_idle[cpu_count - 1] - last_total_idle[cpu_count - 1]);
-      percent /= total;
-      percent *= 100;
-
-      total_cpu_percentage += percent;
-
-      last_total_user[cpu_count - 1] = total_user[cpu_count - 1];
-      last_total_user_low[cpu_count - 1] = total_user_low[cpu_count - 1];
-      last_total_sys[cpu_count - 1] = total_sys[cpu_count - 1];
-      last_total_idle[cpu_count - 1] = total_idle[cpu_count - 1];
-    }
-    stat_file.close();
-
-    if (cpu_count > 0) {
-      return total_cpu_percentage / cpu_count;  // Return average CPU usage
-    }
-  }
-  return -1.0;
-#endif
-}
-}  // namespace cortex::hw
\ No newline at end of file