diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx
index 52123da4a..7434bc62e 100644
--- a/docs/docs/architecture/cortex-db.mdx
+++ b/docs/docs/architecture/cortex-db.mdx
@@ -15,15 +15,14 @@ import TabItem from "@theme/TabItem";
 This document outlines Cortex database architecture which is designed to store and manage models, engines,
 files and more.
 
-## Tables Structure
-
+## Table Structure
 ### schema Table
-
 The `schema` table is designed to hold schema version for cortex database. Below is the structure of the table:
 
 | Column Name        | Data Type | Description                                             |
 |--------------------|-----------|---------------------------------------------------------|
-| version            | INTEGER   | A unique schema version for database.                   |
+| schema_version     | INTEGER   | A unique schema version for database.                   |
+
 
 ### models Table
 The `models` table is designed to hold metadata about various AI models. Below is the structure of the table:
@@ -53,7 +52,6 @@ The `hardware` table is designed to hold metadata about hardware information. Be
 | activated          | INTEGER   | A boolean value (0 or 1) indicating whether the hardware is activated or not. |
 | priority           | INTEGER   | An integer value representing the priority associated with the hardware. |
 
-
 ### engines Table
 The `engines` table is designed to hold metadata about the different engines available for useage with Cortex.
 Below is the structure of the table:
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 2e499bc67..be8fe200d 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -73,7 +73,6 @@ if(CMAKE_BUILD_INJA_TEST)
   add_subdirectory(examples/inja)
 endif()
 
-
 find_package(jsoncpp CONFIG REQUIRED)
 find_package(Drogon CONFIG REQUIRED)
 find_package(yaml-cpp CONFIG REQUIRED)
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index 90feb0b06..c2ef779f1 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -66,16 +66,16 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
   std::wstring params = L"--start-server";
-  params += L" --config_file_path " +
-            file_manager_utils::GetConfigurationPath().wstring();
-  params += L" --data_folder_path " +
-            file_manager_utils::GetCortexDataPath().wstring();
+  params += L" --config_file_path \"" + 
+            file_manager_utils::GetConfigurationPath().wstring() + L"\"";
+  params += L" --data_folder_path \"" +
+            file_manager_utils::GetCortexDataPath().wstring() + L"\"";
   params += L" --loglevel " + cortex::wc::Utf8ToWstring(log_level_);
   std::wstring exe_w = cortex::wc::Utf8ToWstring(exe);
   std::wstring current_path_w =
       file_manager_utils::GetExecutableFolderContainerPath().wstring();
-  std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params;
-  CTL_DBG("wcmds: " << wcmds);
+  std::wstring wcmds = current_path_w + L"\\" + exe_w + L" " + params;
+  CTL_INF("wcmds: " << wcmds);
   std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
   mutable_cmds.push_back(L'\0');
   // Create child process
diff --git a/engine/common/hardware_common.h b/engine/common/hardware_common.h
index 885e1d4b6..b3822b116 100644
--- a/engine/common/hardware_common.h
+++ b/engine/common/hardware_common.h
@@ -79,6 +79,7 @@ struct GPU {
   int64_t total_vram;
   std::string uuid;
   bool is_activated = true;
+  std::string vendor;
 };
 
 inline Json::Value ToJson(const std::vector<GPU>& gpus) {
@@ -100,7 +101,10 @@ inline Json::Value ToJson(const std::vector<GPU>& gpus) {
     gpu["total_vram"] = gpus[i].total_vram;
     gpu["uuid"] = gpus[i].uuid;
     gpu["activated"] = gpus[i].is_activated;
-    res.append(gpu);
+    gpu["vendor"] = gpus[i].vendor;
+    if (gpus[i].total_vram > 0) {
+      res.append(gpu);
+    }
   }
   return res;
 }
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index 1934df3aa..9c5836d3e 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -375,17 +375,21 @@ void Engines::UpdateEngine(
           metadata = (*exist_engine).metadata;
         }
 
+        (void)engine_service_->UnloadEngine(engine);
+
         auto upd_res =
             engine_service_->UpsertEngine(engine, type, api_key, url, version,
                                           "all-platforms", status, metadata);
         if (upd_res.has_error()) {
           Json::Value res;
           res["message"] = upd_res.error();
+          CTL_WRN("Error: " << upd_res.error());
           auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
           resp->setStatusCode(k400BadRequest);
           callback(resp);
         } else {
           Json::Value res;
+          CTL_INF("Remote Engine update successfully!");
           res["message"] = "Remote Engine update successfully!";
           auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
           resp->setStatusCode(k200OK);
@@ -394,6 +398,7 @@ void Engines::UpdateEngine(
       } else {
         Json::Value res;
         res["message"] = "Request body is empty!";
+        CTL_WRN("Error: Request body is empty!");
         auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
         resp->setStatusCode(k400BadRequest);
         callback(resp);
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index ac1f55d8f..7cca9624d 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -218,10 +218,11 @@ void Models::ListModel(
           obj["id"] = model_entry.model;
           obj["model"] = model_entry.model;
           obj["status"] = "downloaded";
-          auto es = model_service_->GetEstimation(model_entry.model);
-          if (es.has_value() && !!es.value()) {
-            obj["recommendation"] = hardware::ToJson(*(es.value()));
-          }
+          // TODO(sang) Temporarily remove this estimation
+          // auto es = model_service_->GetEstimation(model_entry.model);
+          // if (es.has_value() && !!es.value()) {
+          //   obj["recommendation"] = hardware::ToJson(*(es.value()));
+          // }
           data.append(std::move(obj));
           yaml_handler.Reset();
         } else if (model_config.engine == kPythonEngine) {
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index b2d290d24..754f16593 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -59,6 +59,9 @@ class EngineI {
                              const std::string& log_path) = 0;
   virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0;
 
+  // Stop inflight chat completion in stream mode
+  virtual void StopInferencing(const std::string& model_id) = 0;
+
   virtual Json::Value GetRemoteModels() = 0;
   virtual void HandleRouteRequest(
       std::shared_ptr<Json::Value> json_body,
@@ -66,7 +69,4 @@ class EngineI {
   virtual void HandleInference(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
-
-  // Stop inflight chat completion in stream mode
-  virtual void StopInferencing(const std::string& model_id) = 0;
 };
diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc
index b843a3d58..1640b7fac 100644
--- a/engine/extensions/remote-engine/remote_engine.cc
+++ b/engine/extensions/remote-engine/remote_engine.cc
@@ -29,8 +29,13 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
   CTL_DBG(chunk);
   Json::Value check_error;
   Json::Reader reader;
-  if (reader.parse(chunk, check_error)) {
+  context->chunks += chunk;
+  if (reader.parse(context->chunks, check_error) ||
+      (reader.parse(chunk, check_error) &&
+       chunk.find("error") != std::string::npos)) {
+    CTL_WRN(context->chunks);
     CTL_WRN(chunk);
+    CTL_INF("Request: " << context->last_request);
     Json::Value status;
     status["is_done"] = true;
     status["has_error"] = true;
@@ -143,7 +148,9 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest(
       "",
       config.model,
       renderer_,
-      stream_template};
+      stream_template,
+      true,
+      body};
 
   curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
   curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h
index 46222467a..6f1b731c6 100644
--- a/engine/extensions/remote-engine/remote_engine.h
+++ b/engine/extensions/remote-engine/remote_engine.h
@@ -25,6 +25,8 @@ struct StreamContext {
   extensions::TemplateRenderer& renderer;
   std::string stream_template;
   bool need_stop = true;
+  std::string last_request;
+  std::string chunks;
 };
 struct CurlResponse {
   std::string body;
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 8837bd75a..bdc647905 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -870,10 +870,10 @@ cpp::result<void, std::string> EngineService::UnloadEngine(
     auto unload_opts = EngineI::EngineUnloadOption{};
     e->Unload(unload_opts);
     delete e;
-    engines_.erase(ne);
   } else {
     delete std::get<RemoteEngineI*>(engines_[ne].engine);
   }
+  engines_.erase(ne);
 
   CTL_DBG("Engine unloaded: " + ne);
   return {};
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index 00e48d27a..6b19a0119 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -38,6 +38,7 @@ bool TryConnectToServer(const std::string& host, int port) {
 
 HardwareInfo HardwareService::GetHardwareInfo() {
   // append active state
+  std::lock_guard<std::mutex> l(mtx_);
   auto gpus = cortex::hw::GetGPUInfo();
   auto res = db_service_->LoadHardwareList();
   if (res.has_value()) {
@@ -63,7 +64,8 @@ bool HardwareService::Restart(const std::string& host, int port) {
   namespace luh = logging_utils_helper;
   if (!ahc_)
     return true;
-  auto exe = commands::GetCortexServerBinary();
+  auto exe = file_manager_utils::Subtract(
+      file_manager_utils::GetExecutablePath(), cortex_utils::GetCurrentPath());
   auto get_config_file_path = []() -> std::string {
     if (file_manager_utils::cortex_config_file_path.empty()) {
       return file_manager_utils::GetConfigurationPath().string();
@@ -144,16 +146,17 @@ bool HardwareService::Restart(const std::string& host, int port) {
   ZeroMemory(&pi, sizeof(pi));
   // TODO (sang) write a common function for this and server_start_cmd
   std::wstring params = L"--ignore_cout";
-  params += L" --config_file_path " +
-            file_manager_utils::GetConfigurationPath().wstring();
-  params += L" --data_folder_path " +
-            file_manager_utils::GetCortexDataPath().wstring();
+  params += L" --config_file_path \"" +
+            file_manager_utils::GetConfigurationPath().wstring() + L"\"";
+  params += L" --data_folder_path \"" +
+            file_manager_utils::GetCortexDataPath().wstring() + L"\"";
   params += L" --loglevel " +
             cortex::wc::Utf8ToWstring(luh::LogLevelStr(luh::global_log_level));
-  std::wstring exe_w = cortex::wc::Utf8ToWstring(exe);
+  std::wstring exe_w = exe.wstring();
   std::wstring current_path_w =
       file_manager_utils::GetExecutableFolderContainerPath().wstring();
-  std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params;
+  std::wstring wcmds = current_path_w + L"\\" + exe_w + L" " + params;
+  CTL_DBG("wcmds: " << wcmds);
   std::vector<wchar_t> mutable_cmds(wcmds.begin(), wcmds.end());
   mutable_cmds.push_back(L'\0');
   // Create child process
@@ -185,7 +188,7 @@ bool HardwareService::Restart(const std::string& host, int port) {
   auto dylib_path_mng = std::make_shared<cortex::DylibPathManager>();
   auto db_srv = std::make_shared<DatabaseService>();
   EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
-  std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
+  std::string p = cortex_utils::GetCurrentPath() / exe;
   commands.push_back(p);
   commands.push_back("--ignore_cout");
   commands.push_back("--config_file_path");
@@ -486,7 +489,7 @@ std::vector<int> HardwareService::GetCudaConfig() {
   // Map uuid back to nvidia id
   for (auto const& uuid : uuids) {
     for (auto const& ngpu : nvidia_gpus) {
-      if (uuid == ngpu.uuid) {
+      if (ngpu.uuid.find(uuid) != std::string::npos) {
         res.push_back(std::stoi(ngpu.id));
       }
     }
diff --git a/engine/services/hardware_service.h b/engine/services/hardware_service.h
index c002be652..a8a619d4f 100644
--- a/engine/services/hardware_service.h
+++ b/engine/services/hardware_service.h
@@ -2,6 +2,7 @@
 #include <stdint.h>
 #include <string>
 #include <vector>
+#include <mutex>
 
 #include "common/hardware_config.h"
 #include "database_service.h"
@@ -39,4 +40,5 @@ class HardwareService {
  private:
   std::shared_ptr<DatabaseService> db_service_ = nullptr;
   std::optional<cortex::hw::ActivateHardwareConfig> ahc_;
+  std::mutex mtx_;
 };
\ No newline at end of file
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index 4ea9ebdfd..713e1e1ee 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -24,8 +24,12 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
     auto status = std::get<0>(ir)["status_code"].asInt();
     if (status != drogon::k200OK) {
       CTL_INF("Model is not loaded, start loading it: " << model_id);
-      auto res = LoadModel(saved_models_.at(model_id));
-      // ignore return result
+      // For remote engine, we use the updated configuration
+      if (engine_service_->IsRemoteEngine(engine_type)) {
+        (void)model_service_.lock()->StartModel(model_id, {}, false);
+      } else {
+        (void)LoadModel(saved_models_.at(model_id));
+      }
     }
   }
 
@@ -38,7 +42,7 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
     LOG_WARN << "Engine is not loaded yet";
     return cpp::fail(std::make_pair(stt, res));
   }
- 
+
   if (!model_id.empty()) {
     if (auto model_service = model_service_.lock()) {
       auto metadata_ptr = model_service->GetCachedModelMetadata(model_id);
@@ -72,7 +76,6 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
     }
   }
 
-
   CTL_DBG("Json body inference: " + json_body->toStyledString());
 
   auto cb = [q, tool_choice](Json::Value status, Json::Value res) {
@@ -217,10 +220,9 @@ InferResult InferenceService::LoadModel(
     std::get<RemoteEngineI*>(engine_result.value())
         ->LoadModel(json_body, std::move(cb));
   }
-  if (!engine_service_->IsRemoteEngine(engine_type)) {
-    auto model_id = json_body->get("model", "").asString();
-    saved_models_[model_id] = json_body;
-  }
+  // Save model config to reload if needed
+  auto model_id = json_body->get("model", "").asString();
+  saved_models_[model_id] = json_body;
   return std::make_pair(stt, r);
 }
 
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 94ca4f4cc..a9131c886 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -1233,6 +1233,8 @@ cpp::result<std::optional<std::string>, std::string>
 ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
                                int ctx_len, int n_batch, int n_ubatch,
                                const std::string& kv_cache_type) {
+  // TODO(sang) temporary disable this function 
+  return std::nullopt;
   assert(hw_service_);
   auto hw_info = hw_service_->GetHardwareInfo();
   assert(!!engine_svc_);
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
index e6572be5f..f0e29b8bc 100644
--- a/engine/services/model_source_service.cc
+++ b/engine/services/model_source_service.cc
@@ -475,14 +475,13 @@ ModelSourceService::AddCortexsoRepoBranch(const std::string& model_source,
 
 void ModelSourceService::SyncModelSource() {
   while (running_) {
-    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
     auto now = std::chrono::system_clock::now();
     auto config = file_manager_utils::GetCortexConfig();
     auto last_check =
         std::chrono::system_clock::time_point(
             std::chrono::milliseconds(config.checkedForSyncHubAt)) +
         std::chrono::hours(1);
-
     if (now > last_check) {
       CTL_DBG("Start to sync cortex.db");
 
diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc
index c04fef1e6..743c6a641 100644
--- a/engine/utils/file_manager_utils.cc
+++ b/engine/utils/file_manager_utils.cc
@@ -17,14 +17,15 @@
 #endif
 
 namespace file_manager_utils {
-std::filesystem::path GetExecutableFolderContainerPath() {
+
+std::filesystem::path GetExecutablePath() {
 #if defined(__APPLE__) && defined(__MACH__)
   char buffer[1024];
   uint32_t size = sizeof(buffer);
 
   if (_NSGetExecutablePath(buffer, &size) == 0) {
     // CTL_DBG("Executable path: " << buffer);
-    return std::filesystem::path{buffer}.parent_path();
+    return std::filesystem::path{buffer};
   } else {
     CTL_ERR("Failed to get executable path");
     return std::filesystem::current_path();
@@ -35,7 +36,7 @@ std::filesystem::path GetExecutableFolderContainerPath() {
   if (len != -1) {
     buffer[len] = '\0';
     // CTL_DBG("Executable path: " << buffer);
-    return std::filesystem::path{buffer}.parent_path();
+    return std::filesystem::path{buffer};
   } else {
     CTL_ERR("Failed to get executable path");
     return std::filesystem::current_path();
@@ -44,13 +45,17 @@ std::filesystem::path GetExecutableFolderContainerPath() {
   wchar_t buffer[MAX_PATH];
   GetModuleFileNameW(NULL, buffer, MAX_PATH);
   // CTL_DBG("Executable path: " << buffer);
-  return std::filesystem::path{buffer}.parent_path();
+  return std::filesystem::path{buffer};
 #else
   LOG_ERROR << "Unsupported platform!";
   return std::filesystem::current_path();
 #endif
 }
 
+std::filesystem::path GetExecutableFolderContainerPath() {
+  return GetExecutablePath().parent_path();
+}
+
 std::filesystem::path GetHomeDirectoryPath() {
 #ifdef _WIN32
   const wchar_t* homeDir = _wgetenv(L"USERPROFILE");
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
index 059fe6ae3..f60edf4b3 100644
--- a/engine/utils/file_manager_utils.h
+++ b/engine/utils/file_manager_utils.h
@@ -20,6 +20,8 @@ inline std::string cortex_config_file_path;
 
 inline std::string cortex_data_folder_path;
 
+std::filesystem::path GetExecutablePath();
+
 std::filesystem::path GetExecutableFolderContainerPath();
 
 std::filesystem::path GetHomeDirectoryPath();
diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h
index 396184fa6..20ae30bc3 100644
--- a/engine/utils/hardware/cpu_info.h
+++ b/engine/utils/hardware/cpu_info.h
@@ -16,7 +16,17 @@ inline CPU GetCPUInfo() {
     return CPU{};
   auto cpu = res[0];
   cortex::cpuid::CpuInfo inst;
+
+#if defined(__linux__)
+  float usage = 0;
+  for (auto const& c : res) {
+    usage += c.currentUtilisation();
+  }
+  usage = usage / res.size() * 100;
+#else
   float usage = GetCPUUsage();
+#endif
+
   // float usage = 0;
   return CPU{.cores = cpu.numPhysicalCores(),
              .arch = std::string(GetArch()),
diff --git a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
index 4ef7f51ec..bf1157931 100644
--- a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
+++ b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h
@@ -24,6 +24,21 @@
 #endif
 
 namespace cortex::hw {
+inline std::string GetVendorStr(uint32_t vendor_id) {
+  switch (vendor_id) {
+    case 0x1002:
+      return "AMD";
+    case 0x10DE:
+      return "NVIDIA";
+    case 0x8086:
+      return "INTEL";
+    case 0x13B5:
+      return "ARM";
+    default:
+      return std::to_string(vendor_id);
+  }
+}
+
 #if defined(_WIN32)
 // Definitions of the used function pointers. Add more if you use other ADL APIs
 typedef int (*ADL_MAIN_CONTROL_CREATE)(ADL_MAIN_MALLOC_CALLBACK, int);
@@ -236,190 +251,254 @@ inline int FreeLibrary(void* pLibrary) {
 }
 #endif
 
-inline cpp::result<std::vector<cortex::hw::GPU>, std::string> GetGpuInfoList() {
-  namespace fmu = file_manager_utils;
-  auto get_vulkan_path = [](const std::string& lib_vulkan)
-      -> cpp::result<std::filesystem::path, std::string> {
-    if (std::filesystem::exists(fmu::GetExecutableFolderContainerPath() /
-                                lib_vulkan)) {
-      return fmu::GetExecutableFolderContainerPath() / lib_vulkan;
-      // fallback to deps path
-    } else if (std::filesystem::exists(fmu::GetCortexDataPath() / "deps" /
-                                       lib_vulkan)) {
-      return fmu::GetCortexDataPath() / "deps" / lib_vulkan;
-    } else {
-      CTL_WRN("Could not found " << lib_vulkan);
-      return cpp::fail("Could not found " + lib_vulkan);
-    }
-  };
+class VulkanGpu {
+ private:
+  VulkanGpu() { Init(); }
+#if defined(__linux__) || defined(__APPLE__)
+  void* vulkan_library = nullptr;
+#else
+  HMODULE vulkan_library = nullptr;
+#endif
+  std::vector<cortex::hw::GPU> gpus_;
+
+  bool Init() {
+    namespace fmu = file_manager_utils;
+    auto get_vulkan_path = [](const std::string& lib_vulkan)
+        -> cpp::result<std::filesystem::path, std::string> {
+      if (std::filesystem::exists(fmu::GetExecutableFolderContainerPath() /
+                                  lib_vulkan)) {
+        return fmu::GetExecutableFolderContainerPath() / lib_vulkan;
+        // fallback to deps path
+      } else if (std::filesystem::exists(fmu::GetCortexDataPath() / "deps" /
+                                         lib_vulkan)) {
+        return fmu::GetCortexDataPath() / "deps" / lib_vulkan;
+      } else {
+        CTL_WRN("Could not found " << lib_vulkan);
+        return cpp::fail("Could not found " + lib_vulkan);
+      }
+    };
 // Load the Vulkan library
 #if defined(__APPLE__) && defined(__MACH__)
-  return std::vector<cortex::hw::GPU>{};
+    return true;
 #elif defined(__linux__)
-  auto vulkan_path = get_vulkan_path("libvulkan.so");
-  if (vulkan_path.has_error()) {
-    return cpp::fail(vulkan_path.error());
-  }
-  void* vulkan_library =
-      dlopen(vulkan_path.value().string().c_str(), RTLD_LAZY | RTLD_GLOBAL);
+    auto vulkan_path = get_vulkan_path("libvulkan.so");
+    if (vulkan_path.has_error()) {
+      LOG_INFO << vulkan_path.error();
+      return false;
+    }
+    if (vulkan_library == nullptr) {
+      vulkan_library =
+          dlopen(vulkan_path.value().string().c_str(), RTLD_LAZY | RTLD_GLOBAL);
+    }
 #else
-  auto vulkan_path = get_vulkan_path("vulkan-1.dll");
-  if (vulkan_path.has_error()) {
-    return cpp::fail(vulkan_path.error());
-  }
-  HMODULE vulkan_library = LoadLibraryW(vulkan_path.value().wstring().c_str());
+    auto vulkan_path = get_vulkan_path("vulkan-1.dll");
+    if (vulkan_path.has_error()) {
+      LOG_WARN << vulkan_path.error();
+      return false;
+    }
+    if (vulkan_library == nullptr) {
+      vulkan_library = LoadLibraryW(vulkan_path.value().wstring().c_str());
+    }
 #endif
 #if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
-  if (!vulkan_library) {
-    std::cerr << "Failed to load the Vulkan library." << std::endl;
-    return cpp::fail("Failed to load the Vulkan library.");
-  }
+    if (!vulkan_library) {
+      std::cerr << "Failed to load the Vulkan library." << std::endl;
+      return false;
+    }
 
-  // Get the function pointers for other Vulkan functions
-  auto vkEnumerateInstanceExtensionProperties =
-      reinterpret_cast<PFN_vkEnumerateInstanceExtensionProperties>(
-          GetProcAddress(vulkan_library,
-                         "vkEnumerateInstanceExtensionProperties"));
-  auto vkCreateInstance = reinterpret_cast<PFN_vkCreateInstance>(
-      GetProcAddress(vulkan_library, "vkCreateInstance"));
-  auto vkEnumeratePhysicalDevices =
-      reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
-          GetProcAddress(vulkan_library, "vkEnumeratePhysicalDevices"));
-  auto vkGetPhysicalDeviceProperties =
-      reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
-          GetProcAddress(vulkan_library, "vkGetPhysicalDeviceProperties"));
-  auto vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
-      GetProcAddress(vulkan_library, "vkDestroyInstance"));
-  auto vkGetPhysicalDeviceMemoryProperties =
-      (PFN_vkGetPhysicalDeviceMemoryProperties)GetProcAddress(
-          vulkan_library, "vkGetPhysicalDeviceMemoryProperties");
-
-  auto vkGetPhysicalDeviceProperties2 =
-      (PFN_vkGetPhysicalDeviceProperties2)GetProcAddress(
-          vulkan_library, "vkGetPhysicalDeviceProperties2");
-
-  uint32_t extension_count = 0;
-  vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr);
-  std::vector<VkExtensionProperties> available_extensions(extension_count);
-  vkEnumerateInstanceExtensionProperties(nullptr, &extension_count,
-                                         available_extensions.data());
-
-  // Create a Vulkan instance
-  VkInstanceCreateInfo instance_create_info = {};
-  instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-  // If the extension is available, enable it
-  std::vector<const char*> enabled_extensions;
-
-  for (const auto& extension : available_extensions) {
-    enabled_extensions.push_back(extension.extensionName);
-  }
+    // Get the function pointers for other Vulkan functions
+    auto vkEnumerateInstanceExtensionProperties =
+        reinterpret_cast<PFN_vkEnumerateInstanceExtensionProperties>(
+            GetProcAddress(vulkan_library,
+                           "vkEnumerateInstanceExtensionProperties"));
+    auto vkCreateInstance = reinterpret_cast<PFN_vkCreateInstance>(
+        GetProcAddress(vulkan_library, "vkCreateInstance"));
+    auto vkEnumeratePhysicalDevices =
+        reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
+            GetProcAddress(vulkan_library, "vkEnumeratePhysicalDevices"));
+    auto vkGetPhysicalDeviceProperties =
+        reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
+            GetProcAddress(vulkan_library, "vkGetPhysicalDeviceProperties"));
+    auto vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
+        GetProcAddress(vulkan_library, "vkDestroyInstance"));
+    auto vkGetPhysicalDeviceMemoryProperties =
+        (PFN_vkGetPhysicalDeviceMemoryProperties)GetProcAddress(
+            vulkan_library, "vkGetPhysicalDeviceMemoryProperties");
+
+    auto vkGetPhysicalDeviceProperties2 =
+        (PFN_vkGetPhysicalDeviceProperties2)GetProcAddress(
+            vulkan_library, "vkGetPhysicalDeviceProperties2");
+
+    uint32_t extension_count = 0;
+    vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr);
+    std::vector<VkExtensionProperties> available_extensions(extension_count);
+    vkEnumerateInstanceExtensionProperties(nullptr, &extension_count,
+                                           available_extensions.data());
+
+    // Create a Vulkan instance
+    VkInstanceCreateInfo instance_create_info = {};
+    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+    // If the extension is available, enable it
+    std::vector<const char*> enabled_extensions;
+
+    for (const auto& extension : available_extensions) {
+      enabled_extensions.push_back(extension.extensionName);
+    }
 
-  instance_create_info.enabledExtensionCount =
-      static_cast<uint32_t>(available_extensions.size());
-  instance_create_info.ppEnabledExtensionNames = enabled_extensions.data();
-
-  VkInstance instance;
-  if (vkCreateInstance == nullptr || vkEnumeratePhysicalDevices == nullptr ||
-      vkGetPhysicalDeviceProperties == nullptr ||
-      vkDestroyInstance == nullptr ||
-      vkGetPhysicalDeviceMemoryProperties == nullptr ||
-      vkGetPhysicalDeviceProperties2 == nullptr) {
-    return cpp::fail("vulkan API is missing!");
-  }
+    instance_create_info.enabledExtensionCount =
+        static_cast<uint32_t>(available_extensions.size());
+    instance_create_info.ppEnabledExtensionNames = enabled_extensions.data();
+
+    VkInstance instance;
+    if (vkCreateInstance == nullptr || vkEnumeratePhysicalDevices == nullptr ||
+        vkGetPhysicalDeviceProperties == nullptr ||
+        vkDestroyInstance == nullptr ||
+        vkGetPhysicalDeviceMemoryProperties == nullptr ||
+        vkGetPhysicalDeviceProperties2 == nullptr) {
+      LOG_WARN << "vulkan API is missing!";
+      return false;
+    }
 
-  VkResult result = vkCreateInstance(&instance_create_info, nullptr, &instance);
-  if (result != VK_SUCCESS) {
-    FreeLibrary(vulkan_library);
-    return cpp::fail("Failed to create a Vulkan instance.");
-  }
+    VkResult result =
+        vkCreateInstance(&instance_create_info, nullptr, &instance);
+    if (result != VK_SUCCESS) {
+      FreeLibrary(vulkan_library);
+      LOG_WARN << "Failed to create a Vulkan instance.";
+      return false;
+    }
 
-  // Get the physical devices
-  uint32_t physical_device_count = 0;
-  result = vkEnumeratePhysicalDevices(instance, &physical_device_count, nullptr);
-  if (result != VK_SUCCESS) {
-    vkDestroyInstance(instance, nullptr);
-    FreeLibrary(vulkan_library);
-    return cpp::fail("Failed to enumerate physical devices.");
-  }
-  std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
-  vkEnumeratePhysicalDevices(instance, &physical_device_count,
-                             physical_devices.data());
-
-  auto uuid_to_string = [](const uint8_t* device_uuid) -> std::string {
-    std::stringstream ss;
-    ss << std::hex << std::setfill('0');
-    for (uint32_t i = 0; i < VK_UUID_SIZE; ++i) {
-      if (i == 4 || i == 6 || i == 8 || i == 10) {
-        ss << '-';
-      }
-      ss << std::setw(2) << static_cast<int>(device_uuid[i]);
+    // Get the physical devices
+    uint32_t physical_device_count = 0;
+    result =
+        vkEnumeratePhysicalDevices(instance, &physical_device_count, nullptr);
+    if (result != VK_SUCCESS) {
+      vkDestroyInstance(instance, nullptr);
+      FreeLibrary(vulkan_library);
+      LOG_WARN << "Failed to enumerate physical devices.";
+      return false;
     }
-    return ss.str();
-  };
+    std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
+    vkEnumeratePhysicalDevices(instance, &physical_device_count,
+                               physical_devices.data());
+
+    auto uuid_to_string = [](const uint8_t* device_uuid) -> std::string {
+      std::stringstream ss;
+      ss << std::hex << std::setfill('0');
+      for (uint32_t i = 0; i < VK_UUID_SIZE; ++i) {
+        if (i == 4 || i == 6 || i == 8 || i == 10) {
+          ss << '-';
+        }
+        ss << std::setw(2) << static_cast<int>(device_uuid[i]);
+      }
+      return ss.str();
+    };
 
-  std::vector<cortex::hw::GPU> gpus;
+    std::vector<cortex::hw::GPU> gpus;
 #if defined(__linux__)
-  auto gpus_usages =
-      GetGpuUsage().value_or(std::unordered_map<int, AmdGpuUsage>{});
+    auto gpus_usages =
+        GetGpuUsage().value_or(std::unordered_map<int, AmdGpuUsage>{});
 #elif defined(_WIN32)
-  auto gpus_usages =
-      GetGpuUsage().value_or(std::unordered_map<std::string, int>{});
+    auto gpus_usages =
+        GetGpuUsage().value_or(std::unordered_map<std::string, int>{});
 #endif
 
-  // Get the device properties
-  size_t id = 0;
-  for (const auto& physical_device : physical_devices) {
-    VkPhysicalDeviceProperties device_properties;
-    vkGetPhysicalDeviceProperties(physical_device, &device_properties);
-
-    VkPhysicalDeviceIDProperties device_id_properties = {};
-    VkPhysicalDeviceProperties2 device_properties2 = {};
-    device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
-    device_properties2.pNext = &device_id_properties;
-    device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
-
-    vkGetPhysicalDeviceProperties2(physical_device, &device_properties2);
-
-    VkPhysicalDeviceMemoryProperties memory_properties;
-    vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties);
-    int gpu_avail_MiB = 0;
-    for (uint32_t i = 0; i < memory_properties.memoryHeapCount; ++i) {
-      if (memory_properties.memoryHeaps[i].flags &
-          VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
-        gpu_avail_MiB +=
-            memory_properties.memoryHeaps[i].size / (1024ull * 1024ull);
+    // Get the device properties
+    size_t id = 0;
+    for (const auto& physical_device : physical_devices) {
+      VkPhysicalDeviceProperties device_properties;
+      vkGetPhysicalDeviceProperties(physical_device, &device_properties);
+
+      VkPhysicalDeviceIDProperties device_id_properties = {};
+      VkPhysicalDeviceProperties2 device_properties2 = {};
+      device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+      device_properties2.pNext = &device_id_properties;
+      device_id_properties.sType =
+          VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
+
+      vkGetPhysicalDeviceProperties2(physical_device, &device_properties2);
+
+      VkPhysicalDeviceMemoryProperties memory_properties;
+      vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties);
+      int gpu_avail_MiB = 0;
+      for (uint32_t i = 0; i < memory_properties.memoryHeapCount; ++i) {
+        if (memory_properties.memoryHeaps[i].flags &
+            VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
+          gpu_avail_MiB +=
+              memory_properties.memoryHeaps[i].size / (1024ull * 1024ull);
+        }
       }
-    }
 
-    int64_t total_vram_MiB = 0;
-    int64_t used_vram_MiB = 0;
+      int64_t total_vram_MiB = 0;
+      int64_t used_vram_MiB = 0;
 
 #if defined(__linux__)
-    total_vram_MiB = gpus_usages[device_properties.deviceID].total_vram_MiB;
-    used_vram_MiB = gpus_usages[device_properties.deviceID].used_vram_MiB;
+      total_vram_MiB = gpus_usages[device_properties.deviceID].total_vram_MiB;
+      used_vram_MiB = gpus_usages[device_properties.deviceID].used_vram_MiB;
 #elif defined(_WIN32)
-    total_vram_MiB = gpu_avail_MiB;
-    used_vram_MiB = gpus_usages[device_properties.deviceName];
+      total_vram_MiB = gpu_avail_MiB;
+      used_vram_MiB = gpus_usages[device_properties.deviceName];
+
+#endif
+      int free_vram_MiB =
+          total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
+      gpus.emplace_back(cortex::hw::GPU{
+          .id = std::to_string(id),
+          .device_id = device_properties.deviceID,
+          .name = device_properties.deviceName,
+          .version = std::to_string(device_properties.driverVersion),
+          .add_info = cortex::hw::AmdAddInfo{},
+          .free_vram = free_vram_MiB,
+          .total_vram = total_vram_MiB,
+          .uuid = uuid_to_string(device_id_properties.deviceUUID),
+          .vendor = GetVendorStr(device_properties.vendorID)});
+      id++;
+    }
+
+    // Clean up
+    vkDestroyInstance(instance, nullptr);
 
+    gpus_ = gpus;
 #endif
-    int free_vram_MiB =
-        total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
-    gpus.emplace_back(cortex::hw::GPU{
-        .id = std::to_string(id),
-        .device_id = device_properties.deviceID,
-        .name = device_properties.deviceName,
-        .version = std::to_string(device_properties.driverVersion),
-        .add_info = cortex::hw::AmdAddInfo{},
-        .free_vram = free_vram_MiB,
-        .total_vram = total_vram_MiB,
-        .uuid = uuid_to_string(device_id_properties.deviceUUID)});
-    id++;
+    return true;
   }
 
-  // Clean up
-  vkDestroyInstance(instance, nullptr);
-  FreeLibrary(vulkan_library);
-  return gpus;
+ public:
+  VulkanGpu(VulkanGpu const&) = delete;
+  VulkanGpu& operator=(VulkanGpu const&) = delete;
+  ~VulkanGpu() {
+#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
+    if (vulkan_library)
+      FreeLibrary(vulkan_library);
 #endif
-}
+  }
+
+  static VulkanGpu& GetInstance() {
+    static VulkanGpu vg;
+    return vg;
+  }
+
+  cpp::result<std::vector<cortex::hw::GPU>, std::string> GetGpuInfoList() {
+    for (size_t i = 0; i < gpus_.size(); i++) {
+      int64_t total_vram_MiB = 0;
+      int64_t used_vram_MiB = 0;
+
+#if defined(__linux__)
+      auto gpus_usages =
+          GetGpuUsage().value_or(std::unordered_map<int, AmdGpuUsage>{});
+      total_vram_MiB = gpus_usages[gpus_[i].device_id].total_vram_MiB;
+      used_vram_MiB = gpus_usages[gpus_[i].device_id].used_vram_MiB;
+#elif defined(_WIN32)
+      auto gpus_usages =
+          GetGpuUsage().value_or(std::unordered_map<std::string, int>{});
+      total_vram_MiB = gpus_[i].free_vram;
+      used_vram_MiB = gpus_usages[gpus_[i].name];
+#endif
+      int free_vram_MiB =
+          total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0;
+      gpus_[i].free_vram = free_vram_MiB;
+    }
+
+    return gpus_;
+  }
+};
 }  // namespace cortex::hw
\ No newline at end of file
diff --git a/engine/utils/hardware/gpu_info.h b/engine/utils/hardware/gpu_info.h
index 43325bf38..14096d4bb 100644
--- a/engine/utils/hardware/gpu_info.h
+++ b/engine/utils/hardware/gpu_info.h
@@ -9,7 +9,8 @@ namespace cortex::hw {
 
 inline std::vector<GPU> GetGPUInfo() {
   auto nvidia_gpus = system_info_utils::GetGpuInfoList();
-  auto vulkan_gpus = GetGpuInfoList().value_or(std::vector<cortex::hw::GPU>{});
+  auto vulkan_gpus = VulkanGpu::GetInstance().GetGpuInfoList().value_or(
+      std::vector<cortex::hw::GPU>{});
   auto use_vulkan_info = nvidia_gpus.empty();
 
   // In case we have vulkan info, add more information for GPUs
@@ -24,10 +25,20 @@ inline std::vector<GPU> GetGPUInfo() {
             .compute_cap = nvidia_gpus[i].compute_cap.value_or("unknown")};
         vulkan_gpus[j].free_vram = std::stoll(nvidia_gpus[i].vram_free);
         vulkan_gpus[j].total_vram = std::stoll(nvidia_gpus[i].vram_total);
+        vulkan_gpus[j].vendor = nvidia_gpus[i].vendor;
       }
     }
   }
-  
+
+  // Erase invalid GPUs
+  for (std::vector<cortex::hw::GPU>::iterator it = vulkan_gpus.begin();
+       it != vulkan_gpus.end();) {
+    if ((*it).total_vram <= 0)
+      it = vulkan_gpus.erase(it);
+    else
+      ++it;
+  }
+
   if (use_vulkan_info) {
     return vulkan_gpus;
   } else {
@@ -43,7 +54,8 @@ inline std::vector<GPU> GetGPUInfo() {
                       .compute_cap = n.compute_cap.value_or("unknown")},
               .free_vram = std::stoi(n.vram_free),
               .total_vram = std::stoi(n.vram_total),
-              .uuid = n.uuid});
+              .uuid = n.uuid,
+              .vendor = n.vendor});
     }
     return res;
   }
diff --git a/engine/utils/system_info_utils.cc b/engine/utils/system_info_utils.cc
index 673a2a7b7..1448a4b36 100644
--- a/engine/utils/system_info_utils.cc
+++ b/engine/utils/system_info_utils.cc
@@ -124,17 +124,16 @@ std::vector<GpuInfo> GetGpuInfoList() {
 
     while (
         std::regex_search(search_start, output.cend(), match, gpu_info_reg)) {
-      GpuInfo gpuInfo = {
-          match[1].str(),                        // id
-          match[2].str(),                        // vram_total
-          match[3].str(),                        // vram_free
-          match[4].str(),                        // name
-          GetGpuArch(match[4].str()),            // arch
-          driver_version,                        // driver_version
-          cuda_version,                          // cuda_driver_version
-          need_fallback ? "0" : match[5].str(),  // compute_cap
-          match[rg_count].str()                  // uuid
-      };
+      GpuInfo gpuInfo = {match[1].str(),              // id
+                         match[2].str(),              // vram_total
+                         match[3].str(),              // vram_free
+                         match[4].str(),              // name
+                         GetGpuArch(match[4].str()),  // arch
+                         driver_version,              // driver_version
+                         cuda_version,                // cuda_driver_version
+                         need_fallback ? "0" : match[5].str(),  // compute_cap
+                         match[rg_count].str(),                 // uuid
+                         "NVIDIA"};
       gpuInfoList.push_back(gpuInfo);
       search_start = match.suffix().first;
     }
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 79d5db2e1..54eaed8c9 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -121,6 +121,7 @@ struct GpuInfo {
   std::optional<std::string> cuda_driver_version;
   std::optional<std::string> compute_cap;
   std::string uuid;
+  std::string vendor;
 };
 
 std::vector<GpuInfo> GetGpuInfoListVulkan();