Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions pkg/collector/libvirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,13 @@ func NewLibvirtCollector(logger *slog.Logger) (Collector, error) {
err = gpuSMI.Discover()
if err != nil {
// If we failed to fetch GPUs that are from supported
// vendor, return with error
// vendor, DO NOT return with error.
// Seems like we can run into cases where hypervisors
// do not have GPU drivers installed when they use
// passthrough. In case we cannot get GPUs on the
// hypervisor so we should not block exporter from
// starting
logger.Error("Error fetching GPU devices", "err", err)

return nil, err
}

// Check if vGPU is activated on atleast one GPU
Expand Down
5 changes: 4 additions & 1 deletion pkg/collector/libvirt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ func TestNewLibvirtCollector(t *testing.T) {
"--collector.perf.hardware-events",
"--collector.rdma.stats",
"--collector.gpu.type", "nvidia",
"--collector.gpu.nvidia-smi-path", "testdata/nvidia-smi",
// This is to simulate GPU device detection but
// fail to find GPU devices. The collector should
// initialise correctly in that case.
// "--collector.gpu.nvidia-smi-path", "testdata/nvidia-smi",
"--collector.cgroups.force-version", "v2",
},
)
Expand Down