Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions engine/cli/commands/engine_install_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
dp.Connect(host_, port_);
// engine can be small, so need to start ws first
auto dp_res = std::async(std::launch::deferred, [&dp] {
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
bool need_cuda_download =
!system_info_utils::GetDriverAndCudaVersion().second.empty();
if (need_cuda_download) {
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
} else {
Expand Down Expand Up @@ -149,7 +150,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
dp.Connect(host_, port_);
// engine can be small, so need to start ws first
auto dp_res = std::async(std::launch::deferred, [&dp] {
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
bool need_cuda_download =
!system_info_utils::GetDriverAndCudaVersion().second.empty();
if (need_cuda_download) {
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
} else {
Expand Down
3 changes: 2 additions & 1 deletion engine/cli/commands/engine_install_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ class EngineInstallCmd {
port_(port),
show_menu_(show_menu),
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
.cuda_driver_version = system_info_utils::GetCudaVersion()} {};
.cuda_driver_version =
system_info_utils::GetDriverAndCudaVersion().second} {};

bool Exec(const std::string& engine, const std::string& version = "latest",
const std::string& src = "");
Expand Down
3 changes: 2 additions & 1 deletion engine/cli/commands/engine_update_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ bool EngineUpdateCmd::Exec(const std::string& host, int port,
dp.Connect(host, port);
// engine can be small, so need to start ws first
auto dp_res = std::async(std::launch::deferred, [&dp] {
bool need_cuda_download = !system_info_utils::GetCudaVersion().empty();
bool need_cuda_download =
!system_info_utils::GetDriverAndCudaVersion().second.empty();
if (need_cuda_download) {
return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit});
} else {
Expand Down
2 changes: 1 addition & 1 deletion engine/cli/commands/server_start_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace commands {

namespace {
bool TryConnectToServer(const std::string& host, int port) {
constexpr const auto kMaxRetry = 3u;
constexpr const auto kMaxRetry = 4u;
auto count = 0u;
// Check if server is started
while (true) {
Expand Down
3 changes: 2 additions & 1 deletion engine/services/engine_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class EngineService : public EngineServiceI {
explicit EngineService(std::shared_ptr<DownloadService> download_service)
: download_service_{download_service},
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
.cuda_driver_version = system_info_utils::GetCudaVersion()} {}
.cuda_driver_version =
system_info_utils::GetDriverAndCudaVersion().second} {}

std::vector<EngineInfo> GetEngineInfoList() const;

Expand Down
4 changes: 2 additions & 2 deletions engine/services/hardware_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace services {

namespace {
bool TryConnectToServer(const std::string& host, int port) {
constexpr const auto kMaxRetry = 3u;
constexpr const auto kMaxRetry = 4u;
auto count = 0u;
// Check if server is started
while (true) {
Expand Down Expand Up @@ -292,7 +292,7 @@ void HardwareService::UpdateHardwareInfos() {
}

#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
if (system_info_utils::IsNvidiaSmiAvailable()) {
if (!gpus.empty()) {
const char* value = std::getenv("CUDA_VISIBLE_DEVICES");
if (value) {
LOG_INFO << "CUDA_VISIBLE_DEVICES: " << value;
Expand Down
3 changes: 1 addition & 2 deletions engine/utils/hardware/gpu_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ inline std::vector<GPU> GetGPUInfo() {
// Only support for nvidia for now
// auto gpus = hwinfo::getAllGPUs();
auto nvidia_gpus = system_info_utils::GetGpuInfoList();
auto cuda_version = system_info_utils::GetCudaVersion();
for (auto& n : nvidia_gpus) {
res.emplace_back(
GPU{.id = n.id,
.name = n.name,
.version = cuda_version,
.version = nvidia_gpus[0].cuda_driver_version.value_or("unknown"),
.add_info =
NvidiaAddInfo{
.driver_version = n.driver_version.value_or("unknown"),
Expand Down
54 changes: 22 additions & 32 deletions engine/utils/system_info_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ constexpr static auto kUnsupported{"Unsupported"};
constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"};
constexpr static auto kGpuQueryCommand{
"nvidia-smi --query-gpu=index,memory.total,memory.free,name,compute_cap,uuid "
"nvidia-smi "
"--query-gpu=index,memory.total,memory.free,name,compute_cap,uuid "
"--format=csv,noheader,nounits"};
constexpr static auto kGpuInfoRegex{
R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+),\s*([^\n,]+))"};
Expand Down Expand Up @@ -100,53 +101,42 @@ inline bool IsNvidiaSmiAvailable() {
#endif
}

inline std::string GetDriverVersion() {
inline std::pair<std::string, std::string> GetDriverAndCudaVersion() {
if (!IsNvidiaSmiAvailable()) {
CTL_INF("nvidia-smi is not available!");
return "";
return {};
}
try {
std::string driver_version;
std::string cuda_version;
CommandExecutor cmd("nvidia-smi");
auto output = cmd.execute();

const std::regex driver_version_reg(kDriverVersionRegex);
std::smatch match;
std::smatch driver_match;

if (std::regex_search(output, match, driver_version_reg)) {
LOG_INFO << "Gpu Driver Version: " << match[1].str();
return match[1].str();
if (std::regex_search(output, driver_match, driver_version_reg)) {
LOG_INFO << "Gpu Driver Version: " << driver_match[1].str();
driver_version = driver_match[1].str();
} else {
LOG_ERROR << "Gpu Driver not found!";
return "";
return {};
}
} catch (const std::exception& e) {
LOG_ERROR << "Error: " << e.what();
return "";
}
}

inline std::string GetCudaVersion() {
if (!IsNvidiaSmiAvailable()) {
CTL_INF("nvidia-smi is not available!");
return "";
}
try {
CommandExecutor cmd("nvidia-smi");
auto output = cmd.execute();

const std::regex cuda_version_reg(kCudaVersionRegex);
std::smatch match;
std::smatch cuda_match;

if (std::regex_search(output, match, cuda_version_reg)) {
LOG_INFO << "CUDA Version: " << match[1].str();
return match[1].str();
if (std::regex_search(output, cuda_match, cuda_version_reg)) {
LOG_INFO << "CUDA Version: " << cuda_match[1].str();
cuda_version = cuda_match[1].str();
} else {
LOG_ERROR << "CUDA Version not found!";
return "";
return {};
}
return std::pair(driver_version, cuda_version);
} catch (const std::exception& e) {
LOG_ERROR << "Error: " << e.what();
return "";
return {};
}
}

Expand Down Expand Up @@ -227,9 +217,9 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
if (!IsNvidiaSmiAvailable())
return gpuInfoList;
try {
// TODO: improve by parsing both in one command execution
auto driver_version = GetDriverVersion();
auto cuda_version = GetCudaVersion();
auto [driver_version, cuda_version] = GetDriverAndCudaVersion();
if (driver_version.empty() || cuda_version.empty())
return gpuInfoList;

CommandExecutor cmd(kGpuQueryCommand);
auto output = cmd.execute();
Expand All @@ -249,7 +239,7 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
driver_version, // driver_version
cuda_version, // cuda_driver_version
match[5].str(), // compute_cap
match[6].str() // uuid
match[6].str() // uuid
};
gpuInfoList.push_back(gpuInfo);
search_start = match.suffix().first;
Expand Down