diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx index 52123da4a..7434bc62e 100644 --- a/docs/docs/architecture/cortex-db.mdx +++ b/docs/docs/architecture/cortex-db.mdx @@ -15,15 +15,14 @@ import TabItem from "@theme/TabItem"; This document outlines Cortex database architecture which is designed to store and manage models, engines, files and more. -## Tables Structure - +## Table Structure ### schema Table - The `schema` table is designed to hold schema version for cortex database. Below is the structure of the table: | Column Name | Data Type | Description | |--------------------|-----------|---------------------------------------------------------| -| version | INTEGER | A unique schema version for database. | +| schema_version | INTEGER | A unique schema version for database. | + ### models Table The `models` table is designed to hold metadata about various AI models. Below is the structure of the table: @@ -53,7 +52,6 @@ The `hardware` table is designed to hold metadata about hardware information. Be | activated | INTEGER | A boolean value (0 or 1) indicating whether the hardware is activated or not. | | priority | INTEGER | An integer value representing the priority associated with the hardware. | - ### engines Table The `engines` table is designed to hold metadata about the different engines available for useage with Cortex. Below is the structure of the table: diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 2e499bc67..be8fe200d 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -73,7 +73,6 @@ if(CMAKE_BUILD_INJA_TEST) add_subdirectory(examples/inja) endif() - find_package(jsoncpp CONFIG REQUIRED) find_package(Drogon CONFIG REQUIRED) find_package(yaml-cpp CONFIG REQUIRED) diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index 90feb0b06..c2ef779f1 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -66,16 +66,16 @@ bool ServerStartCmd::Exec(const std::string& host, int port, si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); std::wstring params = L"--start-server"; - params += L" --config_file_path " + - file_manager_utils::GetConfigurationPath().wstring(); - params += L" --data_folder_path " + - file_manager_utils::GetCortexDataPath().wstring(); + params += L" --config_file_path \"" + + file_manager_utils::GetConfigurationPath().wstring() + L"\""; + params += L" --data_folder_path \"" + + file_manager_utils::GetCortexDataPath().wstring() + L"\""; params += L" --loglevel " + cortex::wc::Utf8ToWstring(log_level_); std::wstring exe_w = cortex::wc::Utf8ToWstring(exe); std::wstring current_path_w = file_manager_utils::GetExecutableFolderContainerPath().wstring(); - std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params; - CTL_DBG("wcmds: " << wcmds); + std::wstring wcmds = current_path_w + L"\\" + exe_w + L" " + params; + CTL_INF("wcmds: " << wcmds); std::vector mutable_cmds(wcmds.begin(), wcmds.end()); mutable_cmds.push_back(L'\0'); // Create child process diff --git a/engine/common/hardware_common.h b/engine/common/hardware_common.h index 885e1d4b6..b3822b116 100644 --- a/engine/common/hardware_common.h +++ b/engine/common/hardware_common.h @@ -79,6 +79,7 @@ struct GPU { int64_t total_vram; std::string uuid; bool is_activated = true; + std::string vendor; }; inline Json::Value ToJson(const std::vector& gpus) { @@ -100,7 +101,10 @@ inline Json::Value ToJson(const std::vector& gpus) { gpu["total_vram"] = gpus[i].total_vram; gpu["uuid"] = gpus[i].uuid; gpu["activated"] = gpus[i].is_activated; - res.append(gpu); + gpu["vendor"] = gpus[i].vendor; + if (gpus[i].total_vram > 0) { + res.append(gpu); + } } return res; } diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 1934df3aa..9c5836d3e 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -375,17 +375,21 @@ void Engines::UpdateEngine( metadata = (*exist_engine).metadata; } + (void)engine_service_->UnloadEngine(engine); + auto upd_res = engine_service_->UpsertEngine(engine, type, api_key, url, version, "all-platforms", status, metadata); if (upd_res.has_error()) { Json::Value res; res["message"] = upd_res.error(); + CTL_WRN("Error: " << upd_res.error()); auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k400BadRequest); callback(resp); } else { Json::Value res; + CTL_INF("Remote Engine update successfully!"); res["message"] = "Remote Engine update successfully!"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k200OK); @@ -394,6 +398,7 @@ void Engines::UpdateEngine( } else { Json::Value res; res["message"] = "Request body is empty!"; + CTL_WRN("Error: Request body is empty!"); auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode(k400BadRequest); callback(resp); diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index ac1f55d8f..7cca9624d 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -218,10 +218,11 @@ void Models::ListModel( obj["id"] = model_entry.model; obj["model"] = model_entry.model; obj["status"] = "downloaded"; - auto es = model_service_->GetEstimation(model_entry.model); - if (es.has_value() && !!es.value()) { - obj["recommendation"] = hardware::ToJson(*(es.value())); - } + // TODO(sang) Temporarily remove this estimation + // auto es = model_service_->GetEstimation(model_entry.model); + // if (es.has_value() && !!es.value()) { + // obj["recommendation"] = hardware::ToJson(*(es.value())); + // } data.append(std::move(obj)); yaml_handler.Reset(); } else if (model_config.engine == kPythonEngine) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index b2d290d24..754f16593 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -59,6 +59,9 @@ class EngineI { const std::string& log_path) = 0; virtual void SetLogLevel(trantor::Logger::LogLevel logLevel) = 0; + // Stop inflight chat completion in stream mode + virtual void StopInferencing(const std::string& model_id) = 0; + virtual Json::Value GetRemoteModels() = 0; virtual void HandleRouteRequest( std::shared_ptr json_body, @@ -66,7 +69,4 @@ class EngineI { virtual void HandleInference( std::shared_ptr json_body, std::function&& callback) = 0; - - // Stop inflight chat completion in stream mode - virtual void StopInferencing(const std::string& model_id) = 0; }; diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index b843a3d58..1640b7fac 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -29,8 +29,13 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, CTL_DBG(chunk); Json::Value check_error; Json::Reader reader; - if (reader.parse(chunk, check_error)) { + context->chunks += chunk; + if (reader.parse(context->chunks, check_error) || + (reader.parse(chunk, check_error) && + chunk.find("error") != std::string::npos)) { + CTL_WRN(context->chunks); CTL_WRN(chunk); + CTL_INF("Request: " << context->last_request); Json::Value status; status["is_done"] = true; status["has_error"] = true; @@ -143,7 +148,9 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( "", config.model, renderer_, - stream_template}; + stream_template, + true, + body}; curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 46222467a..6f1b731c6 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -25,6 +25,8 @@ struct StreamContext { extensions::TemplateRenderer& renderer; std::string stream_template; bool need_stop = true; + std::string last_request; + std::string chunks; }; struct CurlResponse { std::string body; diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 8837bd75a..bdc647905 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -870,10 +870,10 @@ cpp::result EngineService::UnloadEngine( auto unload_opts = EngineI::EngineUnloadOption{}; e->Unload(unload_opts); delete e; - engines_.erase(ne); } else { delete std::get(engines_[ne].engine); } + engines_.erase(ne); CTL_DBG("Engine unloaded: " + ne); return {}; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 00e48d27a..6b19a0119 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -38,6 +38,7 @@ bool TryConnectToServer(const std::string& host, int port) { HardwareInfo HardwareService::GetHardwareInfo() { // append active state + std::lock_guard l(mtx_); auto gpus = cortex::hw::GetGPUInfo(); auto res = db_service_->LoadHardwareList(); if (res.has_value()) { @@ -63,7 +64,8 @@ bool HardwareService::Restart(const std::string& host, int port) { namespace luh = logging_utils_helper; if (!ahc_) return true; - auto exe = commands::GetCortexServerBinary(); + auto exe = file_manager_utils::Subtract( + file_manager_utils::GetExecutablePath(), cortex_utils::GetCurrentPath()); auto get_config_file_path = []() -> std::string { if (file_manager_utils::cortex_config_file_path.empty()) { return file_manager_utils::GetConfigurationPath().string(); @@ -144,16 +146,17 @@ bool HardwareService::Restart(const std::string& host, int port) { ZeroMemory(&pi, sizeof(pi)); // TODO (sang) write a common function for this and server_start_cmd std::wstring params = L"--ignore_cout"; - params += L" --config_file_path " + - file_manager_utils::GetConfigurationPath().wstring(); - params += L" --data_folder_path " + - file_manager_utils::GetCortexDataPath().wstring(); + params += L" --config_file_path \"" + + file_manager_utils::GetConfigurationPath().wstring() + L"\""; + params += L" --data_folder_path \"" + + file_manager_utils::GetCortexDataPath().wstring() + L"\""; params += L" --loglevel " + cortex::wc::Utf8ToWstring(luh::LogLevelStr(luh::global_log_level)); - std::wstring exe_w = cortex::wc::Utf8ToWstring(exe); + std::wstring exe_w = exe.wstring(); std::wstring current_path_w = file_manager_utils::GetExecutableFolderContainerPath().wstring(); - std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params; + std::wstring wcmds = current_path_w + L"\\" + exe_w + L" " + params; + CTL_DBG("wcmds: " << wcmds); std::vector mutable_cmds(wcmds.begin(), wcmds.end()); mutable_cmds.push_back(L'\0'); // Create child process @@ -185,7 +188,7 @@ bool HardwareService::Restart(const std::string& host, int port) { auto dylib_path_mng = std::make_shared(); auto db_srv = std::make_shared(); EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath(); - std::string p = cortex_utils::GetCurrentPath() + "/" + exe; + std::string p = cortex_utils::GetCurrentPath() / exe; commands.push_back(p); commands.push_back("--ignore_cout"); commands.push_back("--config_file_path"); @@ -486,7 +489,7 @@ std::vector HardwareService::GetCudaConfig() { // Map uuid back to nvidia id for (auto const& uuid : uuids) { for (auto const& ngpu : nvidia_gpus) { - if (uuid == ngpu.uuid) { + if (ngpu.uuid.find(uuid) != std::string::npos) { res.push_back(std::stoi(ngpu.id)); } } diff --git a/engine/services/hardware_service.h b/engine/services/hardware_service.h index c002be652..a8a619d4f 100644 --- a/engine/services/hardware_service.h +++ b/engine/services/hardware_service.h @@ -2,6 +2,7 @@ #include #include #include +#include #include "common/hardware_config.h" #include "database_service.h" @@ -39,4 +40,5 @@ class HardwareService { private: std::shared_ptr db_service_ = nullptr; std::optional ahc_; + std::mutex mtx_; }; \ No newline at end of file diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 4ea9ebdfd..713e1e1ee 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -24,8 +24,12 @@ cpp::result InferenceService::HandleChatCompletion( auto status = std::get<0>(ir)["status_code"].asInt(); if (status != drogon::k200OK) { CTL_INF("Model is not loaded, start loading it: " << model_id); - auto res = LoadModel(saved_models_.at(model_id)); - // ignore return result + // For remote engine, we use the updated configuration + if (engine_service_->IsRemoteEngine(engine_type)) { + (void)model_service_.lock()->StartModel(model_id, {}, false); + } else { + (void)LoadModel(saved_models_.at(model_id)); + } } } @@ -38,7 +42,7 @@ cpp::result InferenceService::HandleChatCompletion( LOG_WARN << "Engine is not loaded yet"; return cpp::fail(std::make_pair(stt, res)); } - + if (!model_id.empty()) { if (auto model_service = model_service_.lock()) { auto metadata_ptr = model_service->GetCachedModelMetadata(model_id); @@ -72,7 +76,6 @@ cpp::result InferenceService::HandleChatCompletion( } } - CTL_DBG("Json body inference: " + json_body->toStyledString()); auto cb = [q, tool_choice](Json::Value status, Json::Value res) { @@ -217,10 +220,9 @@ InferResult InferenceService::LoadModel( std::get(engine_result.value()) ->LoadModel(json_body, std::move(cb)); } - if (!engine_service_->IsRemoteEngine(engine_type)) { - auto model_id = json_body->get("model", "").asString(); - saved_models_[model_id] = json_body; - } + // Save model config to reload if needed + auto model_id = json_body->get("model", "").asString(); + saved_models_[model_id] = json_body; return std::make_pair(stt, r); } diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 94ca4f4cc..a9131c886 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -1233,6 +1233,8 @@ cpp::result, std::string> ModelService::MayFallbackToCpu(const std::string& model_path, int ngl, int ctx_len, int n_batch, int n_ubatch, const std::string& kv_cache_type) { + // TODO(sang) temporary disable this function + return std::nullopt; assert(hw_service_); auto hw_info = hw_service_->GetHardwareInfo(); assert(!!engine_svc_); diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc index e6572be5f..f0e29b8bc 100644 --- a/engine/services/model_source_service.cc +++ b/engine/services/model_source_service.cc @@ -475,14 +475,13 @@ ModelSourceService::AddCortexsoRepoBranch(const std::string& model_source, void ModelSourceService::SyncModelSource() { while (running_) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); auto now = std::chrono::system_clock::now(); auto config = file_manager_utils::GetCortexConfig(); auto last_check = std::chrono::system_clock::time_point( std::chrono::milliseconds(config.checkedForSyncHubAt)) + std::chrono::hours(1); - if (now > last_check) { CTL_DBG("Start to sync cortex.db"); diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index c04fef1e6..743c6a641 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -17,14 +17,15 @@ #endif namespace file_manager_utils { -std::filesystem::path GetExecutableFolderContainerPath() { + +std::filesystem::path GetExecutablePath() { #if defined(__APPLE__) && defined(__MACH__) char buffer[1024]; uint32_t size = sizeof(buffer); if (_NSGetExecutablePath(buffer, &size) == 0) { // CTL_DBG("Executable path: " << buffer); - return std::filesystem::path{buffer}.parent_path(); + return std::filesystem::path{buffer}; } else { CTL_ERR("Failed to get executable path"); return std::filesystem::current_path(); @@ -35,7 +36,7 @@ std::filesystem::path GetExecutableFolderContainerPath() { if (len != -1) { buffer[len] = '\0'; // CTL_DBG("Executable path: " << buffer); - return std::filesystem::path{buffer}.parent_path(); + return std::filesystem::path{buffer}; } else { CTL_ERR("Failed to get executable path"); return std::filesystem::current_path(); @@ -44,13 +45,17 @@ std::filesystem::path GetExecutableFolderContainerPath() { wchar_t buffer[MAX_PATH]; GetModuleFileNameW(NULL, buffer, MAX_PATH); // CTL_DBG("Executable path: " << buffer); - return std::filesystem::path{buffer}.parent_path(); + return std::filesystem::path{buffer}; #else LOG_ERROR << "Unsupported platform!"; return std::filesystem::current_path(); #endif } +std::filesystem::path GetExecutableFolderContainerPath() { + return GetExecutablePath().parent_path(); +} + std::filesystem::path GetHomeDirectoryPath() { #ifdef _WIN32 const wchar_t* homeDir = _wgetenv(L"USERPROFILE"); diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 059fe6ae3..f60edf4b3 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -20,6 +20,8 @@ inline std::string cortex_config_file_path; inline std::string cortex_data_folder_path; +std::filesystem::path GetExecutablePath(); + std::filesystem::path GetExecutableFolderContainerPath(); std::filesystem::path GetHomeDirectoryPath(); diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h index 396184fa6..20ae30bc3 100644 --- a/engine/utils/hardware/cpu_info.h +++ b/engine/utils/hardware/cpu_info.h @@ -16,7 +16,17 @@ inline CPU GetCPUInfo() { return CPU{}; auto cpu = res[0]; cortex::cpuid::CpuInfo inst; + +#if defined(__linux__) + float usage = 0; + for (auto const& c : res) { + usage += c.currentUtilisation(); + } + usage = usage / res.size() * 100; +#else float usage = GetCPUUsage(); +#endif + // float usage = 0; return CPU{.cores = cpu.numPhysicalCores(), .arch = std::string(GetArch()), diff --git a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h index 4ef7f51ec..bf1157931 100644 --- a/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h +++ b/engine/utils/hardware/gpu/vulkan/vulkan_gpu.h @@ -24,6 +24,21 @@ #endif namespace cortex::hw { +inline std::string GetVendorStr(uint32_t vendor_id) { + switch (vendor_id) { + case 0x1002: + return "AMD"; + case 0x10DE: + return "NVIDIA"; + case 0x8086: + return "INTEL"; + case 0x13B5: + return "ARM"; + default: + return std::to_string(vendor_id); + } +} + #if defined(_WIN32) // Definitions of the used function pointers. Add more if you use other ADL APIs typedef int (*ADL_MAIN_CONTROL_CREATE)(ADL_MAIN_MALLOC_CALLBACK, int); @@ -236,190 +251,254 @@ inline int FreeLibrary(void* pLibrary) { } #endif -inline cpp::result, std::string> GetGpuInfoList() { - namespace fmu = file_manager_utils; - auto get_vulkan_path = [](const std::string& lib_vulkan) - -> cpp::result { - if (std::filesystem::exists(fmu::GetExecutableFolderContainerPath() / - lib_vulkan)) { - return fmu::GetExecutableFolderContainerPath() / lib_vulkan; - // fallback to deps path - } else if (std::filesystem::exists(fmu::GetCortexDataPath() / "deps" / - lib_vulkan)) { - return fmu::GetCortexDataPath() / "deps" / lib_vulkan; - } else { - CTL_WRN("Could not found " << lib_vulkan); - return cpp::fail("Could not found " + lib_vulkan); - } - }; +class VulkanGpu { + private: + VulkanGpu() { Init(); } +#if defined(__linux__) || defined(__APPLE__) + void* vulkan_library = nullptr; +#else + HMODULE vulkan_library = nullptr; +#endif + std::vector gpus_; + + bool Init() { + namespace fmu = file_manager_utils; + auto get_vulkan_path = [](const std::string& lib_vulkan) + -> cpp::result { + if (std::filesystem::exists(fmu::GetExecutableFolderContainerPath() / + lib_vulkan)) { + return fmu::GetExecutableFolderContainerPath() / lib_vulkan; + // fallback to deps path + } else if (std::filesystem::exists(fmu::GetCortexDataPath() / "deps" / + lib_vulkan)) { + return fmu::GetCortexDataPath() / "deps" / lib_vulkan; + } else { + CTL_WRN("Could not found " << lib_vulkan); + return cpp::fail("Could not found " + lib_vulkan); + } + }; // Load the Vulkan library #if defined(__APPLE__) && defined(__MACH__) - return std::vector{}; + return true; #elif defined(__linux__) - auto vulkan_path = get_vulkan_path("libvulkan.so"); - if (vulkan_path.has_error()) { - return cpp::fail(vulkan_path.error()); - } - void* vulkan_library = - dlopen(vulkan_path.value().string().c_str(), RTLD_LAZY | RTLD_GLOBAL); + auto vulkan_path = get_vulkan_path("libvulkan.so"); + if (vulkan_path.has_error()) { + LOG_INFO << vulkan_path.error(); + return false; + } + if (vulkan_library == nullptr) { + vulkan_library = + dlopen(vulkan_path.value().string().c_str(), RTLD_LAZY | RTLD_GLOBAL); + } #else - auto vulkan_path = get_vulkan_path("vulkan-1.dll"); - if (vulkan_path.has_error()) { - return cpp::fail(vulkan_path.error()); - } - HMODULE vulkan_library = LoadLibraryW(vulkan_path.value().wstring().c_str()); + auto vulkan_path = get_vulkan_path("vulkan-1.dll"); + if (vulkan_path.has_error()) { + LOG_WARN << vulkan_path.error(); + return false; + } + if (vulkan_library == nullptr) { + vulkan_library = LoadLibraryW(vulkan_path.value().wstring().c_str()); + } #endif #if defined(_WIN32) || defined(_WIN64) || defined(__linux__) - if (!vulkan_library) { - std::cerr << "Failed to load the Vulkan library." << std::endl; - return cpp::fail("Failed to load the Vulkan library."); - } + if (!vulkan_library) { + std::cerr << "Failed to load the Vulkan library." << std::endl; + return false; + } - // Get the function pointers for other Vulkan functions - auto vkEnumerateInstanceExtensionProperties = - reinterpret_cast( - GetProcAddress(vulkan_library, - "vkEnumerateInstanceExtensionProperties")); - auto vkCreateInstance = reinterpret_cast( - GetProcAddress(vulkan_library, "vkCreateInstance")); - auto vkEnumeratePhysicalDevices = - reinterpret_cast( - GetProcAddress(vulkan_library, "vkEnumeratePhysicalDevices")); - auto vkGetPhysicalDeviceProperties = - reinterpret_cast( - GetProcAddress(vulkan_library, "vkGetPhysicalDeviceProperties")); - auto vkDestroyInstance = reinterpret_cast( - GetProcAddress(vulkan_library, "vkDestroyInstance")); - auto vkGetPhysicalDeviceMemoryProperties = - (PFN_vkGetPhysicalDeviceMemoryProperties)GetProcAddress( - vulkan_library, "vkGetPhysicalDeviceMemoryProperties"); - - auto vkGetPhysicalDeviceProperties2 = - (PFN_vkGetPhysicalDeviceProperties2)GetProcAddress( - vulkan_library, "vkGetPhysicalDeviceProperties2"); - - uint32_t extension_count = 0; - vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); - std::vector available_extensions(extension_count); - vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, - available_extensions.data()); - - // Create a Vulkan instance - VkInstanceCreateInfo instance_create_info = {}; - instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - // If the extension is available, enable it - std::vector enabled_extensions; - - for (const auto& extension : available_extensions) { - enabled_extensions.push_back(extension.extensionName); - } + // Get the function pointers for other Vulkan functions + auto vkEnumerateInstanceExtensionProperties = + reinterpret_cast( + GetProcAddress(vulkan_library, + "vkEnumerateInstanceExtensionProperties")); + auto vkCreateInstance = reinterpret_cast( + GetProcAddress(vulkan_library, "vkCreateInstance")); + auto vkEnumeratePhysicalDevices = + reinterpret_cast( + GetProcAddress(vulkan_library, "vkEnumeratePhysicalDevices")); + auto vkGetPhysicalDeviceProperties = + reinterpret_cast( + GetProcAddress(vulkan_library, "vkGetPhysicalDeviceProperties")); + auto vkDestroyInstance = reinterpret_cast( + GetProcAddress(vulkan_library, "vkDestroyInstance")); + auto vkGetPhysicalDeviceMemoryProperties = + (PFN_vkGetPhysicalDeviceMemoryProperties)GetProcAddress( + vulkan_library, "vkGetPhysicalDeviceMemoryProperties"); + + auto vkGetPhysicalDeviceProperties2 = + (PFN_vkGetPhysicalDeviceProperties2)GetProcAddress( + vulkan_library, "vkGetPhysicalDeviceProperties2"); + + uint32_t extension_count = 0; + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + std::vector available_extensions(extension_count); + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, + available_extensions.data()); + + // Create a Vulkan instance + VkInstanceCreateInfo instance_create_info = {}; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + // If the extension is available, enable it + std::vector enabled_extensions; + + for (const auto& extension : available_extensions) { + enabled_extensions.push_back(extension.extensionName); + } - instance_create_info.enabledExtensionCount = - static_cast(available_extensions.size()); - instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); - - VkInstance instance; - if (vkCreateInstance == nullptr || vkEnumeratePhysicalDevices == nullptr || - vkGetPhysicalDeviceProperties == nullptr || - vkDestroyInstance == nullptr || - vkGetPhysicalDeviceMemoryProperties == nullptr || - vkGetPhysicalDeviceProperties2 == nullptr) { - return cpp::fail("vulkan API is missing!"); - } + instance_create_info.enabledExtensionCount = + static_cast(available_extensions.size()); + instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); + + VkInstance instance; + if (vkCreateInstance == nullptr || vkEnumeratePhysicalDevices == nullptr || + vkGetPhysicalDeviceProperties == nullptr || + vkDestroyInstance == nullptr || + vkGetPhysicalDeviceMemoryProperties == nullptr || + vkGetPhysicalDeviceProperties2 == nullptr) { + LOG_WARN << "vulkan API is missing!"; + return false; + } - VkResult result = vkCreateInstance(&instance_create_info, nullptr, &instance); - if (result != VK_SUCCESS) { - FreeLibrary(vulkan_library); - return cpp::fail("Failed to create a Vulkan instance."); - } + VkResult result = + vkCreateInstance(&instance_create_info, nullptr, &instance); + if (result != VK_SUCCESS) { + FreeLibrary(vulkan_library); + LOG_WARN << "Failed to create a Vulkan instance."; + return false; + } - // Get the physical devices - uint32_t physical_device_count = 0; - result = vkEnumeratePhysicalDevices(instance, &physical_device_count, nullptr); - if (result != VK_SUCCESS) { - vkDestroyInstance(instance, nullptr); - FreeLibrary(vulkan_library); - return cpp::fail("Failed to enumerate physical devices."); - } - std::vector physical_devices(physical_device_count); - vkEnumeratePhysicalDevices(instance, &physical_device_count, - physical_devices.data()); - - auto uuid_to_string = [](const uint8_t* device_uuid) -> std::string { - std::stringstream ss; - ss << std::hex << std::setfill('0'); - for (uint32_t i = 0; i < VK_UUID_SIZE; ++i) { - if (i == 4 || i == 6 || i == 8 || i == 10) { - ss << '-'; - } - ss << std::setw(2) << static_cast(device_uuid[i]); + // Get the physical devices + uint32_t physical_device_count = 0; + result = + vkEnumeratePhysicalDevices(instance, &physical_device_count, nullptr); + if (result != VK_SUCCESS) { + vkDestroyInstance(instance, nullptr); + FreeLibrary(vulkan_library); + LOG_WARN << "Failed to enumerate physical devices."; + return false; } - return ss.str(); - }; + std::vector physical_devices(physical_device_count); + vkEnumeratePhysicalDevices(instance, &physical_device_count, + physical_devices.data()); + + auto uuid_to_string = [](const uint8_t* device_uuid) -> std::string { + std::stringstream ss; + ss << std::hex << std::setfill('0'); + for (uint32_t i = 0; i < VK_UUID_SIZE; ++i) { + if (i == 4 || i == 6 || i == 8 || i == 10) { + ss << '-'; + } + ss << std::setw(2) << static_cast(device_uuid[i]); + } + return ss.str(); + }; - std::vector gpus; + std::vector gpus; #if defined(__linux__) - auto gpus_usages = - GetGpuUsage().value_or(std::unordered_map{}); + auto gpus_usages = + GetGpuUsage().value_or(std::unordered_map{}); #elif defined(_WIN32) - auto gpus_usages = - GetGpuUsage().value_or(std::unordered_map{}); + auto gpus_usages = + GetGpuUsage().value_or(std::unordered_map{}); #endif - // Get the device properties - size_t id = 0; - for (const auto& physical_device : physical_devices) { - VkPhysicalDeviceProperties device_properties; - vkGetPhysicalDeviceProperties(physical_device, &device_properties); - - VkPhysicalDeviceIDProperties device_id_properties = {}; - VkPhysicalDeviceProperties2 device_properties2 = {}; - device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - device_properties2.pNext = &device_id_properties; - device_id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; - - vkGetPhysicalDeviceProperties2(physical_device, &device_properties2); - - VkPhysicalDeviceMemoryProperties memory_properties; - vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); - int gpu_avail_MiB = 0; - for (uint32_t i = 0; i < memory_properties.memoryHeapCount; ++i) { - if (memory_properties.memoryHeaps[i].flags & - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { - gpu_avail_MiB += - memory_properties.memoryHeaps[i].size / (1024ull * 1024ull); + // Get the device properties + size_t id = 0; + for (const auto& physical_device : physical_devices) { + VkPhysicalDeviceProperties device_properties; + vkGetPhysicalDeviceProperties(physical_device, &device_properties); + + VkPhysicalDeviceIDProperties device_id_properties = {}; + VkPhysicalDeviceProperties2 device_properties2 = {}; + device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + device_properties2.pNext = &device_id_properties; + device_id_properties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + + vkGetPhysicalDeviceProperties2(physical_device, &device_properties2); + + VkPhysicalDeviceMemoryProperties memory_properties; + vkGetPhysicalDeviceMemoryProperties(physical_device, &memory_properties); + int gpu_avail_MiB = 0; + for (uint32_t i = 0; i < memory_properties.memoryHeapCount; ++i) { + if (memory_properties.memoryHeaps[i].flags & + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { + gpu_avail_MiB += + memory_properties.memoryHeaps[i].size / (1024ull * 1024ull); + } } - } - int64_t total_vram_MiB = 0; - int64_t used_vram_MiB = 0; + int64_t total_vram_MiB = 0; + int64_t used_vram_MiB = 0; #if defined(__linux__) - total_vram_MiB = gpus_usages[device_properties.deviceID].total_vram_MiB; - used_vram_MiB = gpus_usages[device_properties.deviceID].used_vram_MiB; + total_vram_MiB = gpus_usages[device_properties.deviceID].total_vram_MiB; + used_vram_MiB = gpus_usages[device_properties.deviceID].used_vram_MiB; #elif defined(_WIN32) - total_vram_MiB = gpu_avail_MiB; - used_vram_MiB = gpus_usages[device_properties.deviceName]; + total_vram_MiB = gpu_avail_MiB; + used_vram_MiB = gpus_usages[device_properties.deviceName]; + +#endif + int free_vram_MiB = + total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0; + gpus.emplace_back(cortex::hw::GPU{ + .id = std::to_string(id), + .device_id = device_properties.deviceID, + .name = device_properties.deviceName, + .version = std::to_string(device_properties.driverVersion), + .add_info = cortex::hw::AmdAddInfo{}, + .free_vram = free_vram_MiB, + .total_vram = total_vram_MiB, + .uuid = uuid_to_string(device_id_properties.deviceUUID), + .vendor = GetVendorStr(device_properties.vendorID)}); + id++; + } + + // Clean up + vkDestroyInstance(instance, nullptr); + gpus_ = gpus; #endif - int free_vram_MiB = - total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0; - gpus.emplace_back(cortex::hw::GPU{ - .id = std::to_string(id), - .device_id = device_properties.deviceID, - .name = device_properties.deviceName, - .version = std::to_string(device_properties.driverVersion), - .add_info = cortex::hw::AmdAddInfo{}, - .free_vram = free_vram_MiB, - .total_vram = total_vram_MiB, - .uuid = uuid_to_string(device_id_properties.deviceUUID)}); - id++; + return true; } - // Clean up - vkDestroyInstance(instance, nullptr); - FreeLibrary(vulkan_library); - return gpus; + public: + VulkanGpu(VulkanGpu const&) = delete; + VulkanGpu& operator=(VulkanGpu const&) = delete; + ~VulkanGpu() { +#if defined(_WIN32) || defined(_WIN64) || defined(__linux__) + if (vulkan_library) + FreeLibrary(vulkan_library); #endif -} + } + + static VulkanGpu& GetInstance() { + static VulkanGpu vg; + return vg; + } + + cpp::result, std::string> GetGpuInfoList() { + for (size_t i = 0; i < gpus_.size(); i++) { + int64_t total_vram_MiB = 0; + int64_t used_vram_MiB = 0; + +#if defined(__linux__) + auto gpus_usages = + GetGpuUsage().value_or(std::unordered_map{}); + total_vram_MiB = gpus_usages[gpus_[i].device_id].total_vram_MiB; + used_vram_MiB = gpus_usages[gpus_[i].device_id].used_vram_MiB; +#elif defined(_WIN32) + auto gpus_usages = + GetGpuUsage().value_or(std::unordered_map{}); + total_vram_MiB = gpus_[i].free_vram; + used_vram_MiB = gpus_usages[gpus_[i].name]; +#endif + int free_vram_MiB = + total_vram_MiB > used_vram_MiB ? total_vram_MiB - used_vram_MiB : 0; + gpus_[i].free_vram = free_vram_MiB; + } + + return gpus_; + } +}; } // namespace cortex::hw \ No newline at end of file diff --git a/engine/utils/hardware/gpu_info.h b/engine/utils/hardware/gpu_info.h index 43325bf38..14096d4bb 100644 --- a/engine/utils/hardware/gpu_info.h +++ b/engine/utils/hardware/gpu_info.h @@ -9,7 +9,8 @@ namespace cortex::hw { inline std::vector GetGPUInfo() { auto nvidia_gpus = system_info_utils::GetGpuInfoList(); - auto vulkan_gpus = GetGpuInfoList().value_or(std::vector{}); + auto vulkan_gpus = VulkanGpu::GetInstance().GetGpuInfoList().value_or( + std::vector{}); auto use_vulkan_info = nvidia_gpus.empty(); // In case we have vulkan info, add more information for GPUs @@ -24,10 +25,20 @@ inline std::vector GetGPUInfo() { .compute_cap = nvidia_gpus[i].compute_cap.value_or("unknown")}; vulkan_gpus[j].free_vram = std::stoll(nvidia_gpus[i].vram_free); vulkan_gpus[j].total_vram = std::stoll(nvidia_gpus[i].vram_total); + vulkan_gpus[j].vendor = nvidia_gpus[i].vendor; } } } - + + // Erase invalid GPUs + for (std::vector::iterator it = vulkan_gpus.begin(); + it != vulkan_gpus.end();) { + if ((*it).total_vram <= 0) + it = vulkan_gpus.erase(it); + else + ++it; + } + if (use_vulkan_info) { return vulkan_gpus; } else { @@ -43,7 +54,8 @@ inline std::vector GetGPUInfo() { .compute_cap = n.compute_cap.value_or("unknown")}, .free_vram = std::stoi(n.vram_free), .total_vram = std::stoi(n.vram_total), - .uuid = n.uuid}); + .uuid = n.uuid, + .vendor = n.vendor}); } return res; } diff --git a/engine/utils/system_info_utils.cc b/engine/utils/system_info_utils.cc index 673a2a7b7..1448a4b36 100644 --- a/engine/utils/system_info_utils.cc +++ b/engine/utils/system_info_utils.cc @@ -124,17 +124,16 @@ std::vector GetGpuInfoList() { while ( std::regex_search(search_start, output.cend(), match, gpu_info_reg)) { - GpuInfo gpuInfo = { - match[1].str(), // id - match[2].str(), // vram_total - match[3].str(), // vram_free - match[4].str(), // name - GetGpuArch(match[4].str()), // arch - driver_version, // driver_version - cuda_version, // cuda_driver_version - need_fallback ? "0" : match[5].str(), // compute_cap - match[rg_count].str() // uuid - }; + GpuInfo gpuInfo = {match[1].str(), // id + match[2].str(), // vram_total + match[3].str(), // vram_free + match[4].str(), // name + GetGpuArch(match[4].str()), // arch + driver_version, // driver_version + cuda_version, // cuda_driver_version + need_fallback ? "0" : match[5].str(), // compute_cap + match[rg_count].str(), // uuid + "NVIDIA"}; gpuInfoList.push_back(gpuInfo); search_start = match.suffix().first; } diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 79d5db2e1..54eaed8c9 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -121,6 +121,7 @@ struct GpuInfo { std::optional cuda_driver_version; std::optional compute_cap; std::string uuid; + std::string vendor; }; std::vector GetGpuInfoListVulkan();