janhq · vansangpfiev · Feb 27, 2025 · Feb 25, 2025 · Feb 26, 2025 · Feb 27, 2025
diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx
@@ -16,12 +16,12 @@ This document outlines Cortex database architecture which is designed to store a
 files and more.
 
 ## Table Structure
-### schema Table
-The `schema` table is designed to hold schema version for cortex database. Below is the structure of the table:
+### schema_version Table
+The `schema_version` table is designed to hold schema version for cortex database. Below is the structure of the table:
 
 | Column Name        | Data Type | Description                                             |
 |--------------------|-----------|---------------------------------------------------------|
-| schema_version     | INTEGER   | A unique schema version for database.                   |
+| version     | INTEGER   | A unique schema version for database.                   |
 
 ### models Table
 The `models` table is designed to hold metadata about various AI models. Below is the structure of the table:
@@ -63,10 +63,10 @@ Below is the structure of the table:
 | api_key      | TEXT      |  |
 | url          | TEXT      |  |
 | version      | TEXT      | The current version of the engine. |
-| variant      | TEXT      |  |
+| variant      | TEXT      | A string that specifies the specific configuration or build variant of the engine. |
 | status       | TEXT      | Current status of the engine (e.g., "downloaded", "downloadable"). |
 | metadata     | TEXT      | Additional metadata or information about the engine. |
-| date_ceated  | TEXT      | Date when the engine was downloaded. |
+| date_created  | TEXT      | Date when the engine was downloaded. |
 | date_updated | TEXT      | Date when the engine was last updated. |
 
 ### files Table
@@ -78,5 +78,5 @@ The `files` table is designed to hold metadata about objects dowloaded via Corte
 | object     | TEXT      | The type of hardware.            |
 | purpose    | TEXT      | Purpose of file                  |
 | filename   | TEXT      | The name of the file.            |
-| created_at | INTEGER   | Date when file was created       |
-| bytes      | INTEGER   |                                  |
+| created_at | INTEGER   | Date when file was created.       |
+| bytes      | INTEGER   | Size of the file on disk in bytes. |
diff --git a/engine/common/hardware_common.h b/engine/common/hardware_common.h
@@ -69,6 +69,16 @@ struct NvidiaAddInfo {
 };
 struct AmdAddInfo {};
 using GPUAddInfo = std::variant<NvidiaAddInfo, AmdAddInfo>;
+
+enum class GpuType {
+  kGpuTypeOther = 0,
+  kGpuTypeIntegrated = 1,
+  kGpuTypeDiscrete = 2,
+  kGpuTypeVirtual = 3,
+  kGpuTypeCpu = 4,
+  kGpuTypeMaxEnum = 0x7FFFFFFF
+};
+
 struct GPU {
   std::string id;
   uint32_t device_id;
@@ -80,6 +90,7 @@ struct GPU {
   std::string uuid;
   bool is_activated = true;
   std::string vendor;
+  GpuType gpu_type;
 };
 
 inline Json::Value ToJson(const std::vector<GPU>& gpus) {

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
@@ -218,11 +218,10 @@ void Models::ListModel(
           obj["id"] = model_entry.model;
           obj["model"] = model_entry.model;
           obj["status"] = "downloaded";
-          // TODO(sang) Temporarily remove this estimation
-          // auto es = model_service_->GetEstimation(model_entry.model);
-          // if (es.has_value() && !!es.value()) {
-          //   obj["recommendation"] = hardware::ToJson(*(es.value()));
-          // }
+          auto es = model_service_->GetEstimation(model_entry.model);
+          if (es.has_value()) {
+            obj["recommendation"] = hardware::ToJson(*es);
+          }
           data.append(std::move(obj));
           yaml_handler.Reset();
         } else if (model_config.engine == kPythonEngine) {

diff --git a/engine/main.cc b/engine/main.cc
@@ -37,6 +37,7 @@
 #include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 #include "utils/system_info_utils.h"
+#include "utils/task_queue.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h>  // for dirname()
@@ -177,8 +178,11 @@ void RunServer(std::optional<std::string> host, std::optional<int> port,
       download_service, dylib_path_manager, db_service);
   auto inference_svc = std::make_shared<InferenceService>(engine_service);
   auto model_src_svc = std::make_shared<ModelSourceService>(db_service);
-  auto model_service = std::make_shared<ModelService>(
-      db_service, hw_service, download_service, inference_svc, engine_service);
+  cortex::TaskQueue task_queue(
+      std::min(2u, std::thread::hardware_concurrency()), "background_task");
+  auto model_service =
+      std::make_shared<ModelService>(db_service, hw_service, download_service,
+                                     inference_svc, engine_service, task_queue);
   inference_svc->SetModelService(model_service);
 
   auto file_watcher_srv = std::make_shared<FileWatcherService>(

diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
@@ -52,7 +52,7 @@ HardwareInfo HardwareService::GetHardwareInfo() {
     };
   }
 
-  return HardwareInfo{.cpu = cortex::hw::GetCPUInfo(),
+  return HardwareInfo{.cpu = cpu_info_.GetCPUInfo(),
                       .os = cortex::hw::GetOSInfo(),
                       .ram = cortex::hw::GetMemoryInfo(),
                       .storage = cortex::hw::GetStorageInfo(),
@@ -207,9 +207,6 @@ bool HardwareService::Restart(const std::string& host, int port) {
     if (!TryConnectToServer(host, port)) {
       return false;
     }
-    std::cout << "Server started" << std::endl;
-    std::cout << "API Documentation available at: http://" << host << ":"
-              << port << std::endl;
   }
 
 #endif
@@ -322,23 +319,40 @@ void HardwareService::UpdateHardwareInfos() {
     }
   }
   CTL_INF("Activated GPUs before: " << debug_b);
+  auto has_nvidia = [&gpus] {
+    for (auto const& g : gpus) {
+      if (g.vendor == cortex::hw::kNvidiaStr) {
+        return true;
+      }
+    }
+    return false;
+  }();
+
   for (auto const& gpu : gpus) {
-    // ignore error
-    // Note: only support NVIDIA for now, so hardware_id = software_id
     if (db_service_->HasHardwareEntry(gpu.uuid)) {
       auto res = db_service_->UpdateHardwareEntry(gpu.uuid, std::stoi(gpu.id),
-                                           std::stoi(gpu.id));
+                                                  std::stoi(gpu.id));
       if (res.has_error()) {
         CTL_WRN(res.error());
       }
     } else {
-      auto res =
-      db_service_->AddHardwareEntry(HwEntry{.uuid = gpu.uuid,
-                                         .type = "gpu",
-                                         .hardware_id = std::stoi(gpu.id),
-                                         .software_id = std::stoi(gpu.id),
-                                         .activated = true,
-                                         .priority = INT_MAX});
+      // iGPU should be deactivated by default
+      // Only activate Nvidia GPUs if both AMD and Nvidia GPUs exists
+      auto activated = [&gpu, &gpus, has_nvidia] {
+        if (gpu.gpu_type != cortex::hw::GpuType::kGpuTypeDiscrete)
+          return false;
+        if (has_nvidia && gpu.vendor != cortex::hw::kNvidiaStr)
+          return false;
+        return true;
+      };
+
+      auto res = db_service_->AddHardwareEntry(
+          HwEntry{.uuid = gpu.uuid,
+                  .type = "gpu",
+                  .hardware_id = std::stoi(gpu.id),
+                  .software_id = std::stoi(gpu.id),
+                  .activated = activated(),
+                  .priority = INT_MAX});
       if (res.has_error()) {
         CTL_WRN(res.error());
       }

diff --git a/engine/services/hardware_service.h b/engine/services/hardware_service.h
@@ -1,8 +1,8 @@
 #pragma once
 #include <stdint.h>
+#include <mutex>
 #include <string>
 #include <vector>
-#include <mutex>
 
 #include "common/hardware_config.h"
 #include "database_service.h"
@@ -41,4 +41,5 @@ class HardwareService {
   std::shared_ptr<DatabaseService> db_service_ = nullptr;
   std::optional<cortex::hw::ActivateHardwareConfig> ahc_;
   std::mutex mtx_;
+  cortex::hw::CpuInfo cpu_info_;
 };
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -143,6 +143,21 @@ cpp::result<DownloadTask, std::string> GetDownloadTask(
 }
 }  // namespace
 
+ModelService::ModelService(std::shared_ptr<DatabaseService> db_service,
+                           std::shared_ptr<HardwareService> hw_service,
+                           std::shared_ptr<DownloadService> download_service,
+                           std::shared_ptr<InferenceService> inference_service,
+                           std::shared_ptr<EngineServiceI> engine_svc,
+                           cortex::TaskQueue& task_queue)
+    : db_service_(db_service),
+      hw_service_(hw_service),
+      download_service_{download_service},
+      inference_svc_(inference_service),
+      engine_svc_(engine_svc),
+      task_queue_(task_queue) {
+  ProcessBgrTasks();
+};
+
 void ModelService::ForceIndexingModelList() {
   CTL_INF("Force indexing model list");
 
@@ -331,8 +346,17 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
   return download_service_->AddTask(downloadTask, on_finished);
 }
 
+std::optional<hardware::Estimation> ModelService::GetEstimation(
+    const std::string& model_handle) {
+  std::lock_guard l(es_mtx_);
+  if (auto it = es_.find(model_handle); it != es_.end()) {
+    return it->second;
+  }
+  return std::nullopt;
+}
+
 cpp::result<std::optional<hardware::Estimation>, std::string>
-ModelService::GetEstimation(const std::string& model_handle,
+ModelService::EstimateModel(const std::string& model_handle,
                             const std::string& kv_cache, int n_batch,
                             int n_ubatch) {
   namespace fs = std::filesystem;
@@ -548,7 +572,7 @@ ModelService::DownloadModelFromCortexsoAsync(
           // Close the file
           pyvenv_cfg.close();
           // Add executable permission to python
-          set_permission_utils::SetExecutePermissionsRecursive(venv_path);
+          (void)set_permission_utils::SetExecutePermissionsRecursive(venv_path);
         } else {
           CTL_ERR("Failed to extract venv.zip");
         };
@@ -828,7 +852,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
             CTL_WRN("Error: " + res.error());
             for (auto& depend : depends) {
               if (depend != model_handle) {
-                StopModel(depend);
+                auto sr = StopModel(depend);
               }
             }
             return cpp::fail("Model failed to start dependency '" + depend +
@@ -945,6 +969,11 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
 
     json_helper::MergeJson(json_data, params_override);
 
+    // Set default cpu_threads if it is not configured
+    if (!json_data.isMember("cpu_threads")) {
+      json_data["cpu_threads"] = GetCpuThreads();
+    }
+
     // Set the latest ctx_len
     if (ctx_len) {
       json_data["ctx_len"] =
@@ -1329,6 +1358,10 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
   return warning;
 }
 
+int ModelService::GetCpuThreads() const {
+  return std::max(std::thread::hardware_concurrency() / 2, 1u);
+}
+
 cpp::result<std::shared_ptr<ModelMetadata>, std::string>
 ModelService::GetModelMetadata(const std::string& model_id) const {
   if (model_id.empty()) {
@@ -1381,4 +1414,28 @@ std::string ModelService::GetEngineByModelId(
   auto mc = yaml_handler.GetModelConfig();
   CTL_DBG(mc.engine);
   return mc.engine;
+}
+
+void ModelService::ProcessBgrTasks() {
+  CTL_INF("Start processing background tasks")
+  auto cb = [this] {
+    CTL_DBG("Estimate model resource usage");
+    auto list_entry = db_service_->LoadModelList();
+    if (list_entry) {
+      for (const auto& model_entry : list_entry.value()) {
+        // Only process local models
+        if (model_entry.status == cortex::db::ModelStatus::Downloaded) {
+          auto es = EstimateModel(model_entry.model);
+          if (es.has_value()) {
+            std::lock_guard l(es_mtx_);
+            es_[model_entry.model] = es.value();
+          }
+        }
+      }
+    }
+  };
+
+  auto clone = cb;
+  task_queue_.RunInQueue(std::move(cb));
+  task_queue_.RunEvery(std::chrono::seconds(10), std::move(clone));
 }
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
@@ -10,6 +10,7 @@
 #include "services/download_service.h"
 #include "services/hardware_service.h"
 #include "utils/hardware/gguf/gguf_file_estimate.h"
+#include "utils/task_queue.h"
 
 class InferenceService;
 
@@ -35,12 +36,8 @@ class ModelService {
                         std::shared_ptr<HardwareService> hw_service,
                         std::shared_ptr<DownloadService> download_service,
                         std::shared_ptr<InferenceService> inference_service,
-                        std::shared_ptr<EngineServiceI> engine_svc)
-      : db_service_(db_service),
-        hw_service_(hw_service),
-        download_service_{download_service},
-        inference_svc_(inference_service),
-        engine_svc_(engine_svc) {};
+                        std::shared_ptr<EngineServiceI> engine_svc,
+                        cortex::TaskQueue& task_queue);
 
   cpp::result<std::string, std::string> AbortDownloadModel(
       const std::string& task_id);
@@ -81,7 +78,10 @@ class ModelService {
 
   bool HasModel(const std::string& id) const;
 
-  cpp::result<std::optional<hardware::Estimation>, std::string> GetEstimation(
+  std::optional<hardware::Estimation> GetEstimation(
+      const std::string& model_handle);
+
+  cpp::result<std::optional<hardware::Estimation>, std::string> EstimateModel(
       const std::string& model_handle, const std::string& kv_cache = "f16",
       int n_batch = 2048, int n_ubatch = 2048);
 
@@ -112,6 +112,10 @@ class ModelService {
       const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048,
       int n_ubatch = 2048, const std::string& kv_cache_type = "f16");
 
+  void ProcessBgrTasks();
+
+  int GetCpuThreads() const;
+
   std::shared_ptr<DatabaseService> db_service_;
   std::shared_ptr<HardwareService> hw_service_;
   std::shared_ptr<DownloadService> download_service_;
@@ -124,4 +128,8 @@ class ModelService {
    */
   std::unordered_map<std::string, std::shared_ptr<ModelMetadata>>
       loaded_model_metadata_map_;
+
+  std::mutex es_mtx_;
+  std::unordered_map<std::string, std::optional<hardware::Estimation>> es_;
+  cortex::TaskQueue& task_queue_;
 };