diff --git a/pkg/collector/libvirt.go b/pkg/collector/libvirt.go index 4e627f5a..9672cff3 100644 --- a/pkg/collector/libvirt.go +++ b/pkg/collector/libvirt.go @@ -235,10 +235,13 @@ func NewLibvirtCollector(logger *slog.Logger) (Collector, error) { err = gpuSMI.Discover() if err != nil { // If we failed to fetch GPUs that are from supported - // vendor, return with error + // vendor, DO NOT return with error. + // Seems like we can run into cases where hypervisors + // do not have GPU drivers installed when they use + // passthrough. In case we cannot get GPUs on the + // hypervisor so we should not block exporter from + // starting logger.Error("Error fetching GPU devices", "err", err) - - return nil, err } // Check if vGPU is activated on atleast one GPU diff --git a/pkg/collector/libvirt_test.go b/pkg/collector/libvirt_test.go index 4c056049..eeaeb818 100644 --- a/pkg/collector/libvirt_test.go +++ b/pkg/collector/libvirt_test.go @@ -30,7 +30,10 @@ func TestNewLibvirtCollector(t *testing.T) { "--collector.perf.hardware-events", "--collector.rdma.stats", "--collector.gpu.type", "nvidia", - "--collector.gpu.nvidia-smi-path", "testdata/nvidia-smi", + // This is to simulate GPU device detection but + // fail to find GPU devices. The collector should + // initialise correctly in that case. + // "--collector.gpu.nvidia-smi-path", "testdata/nvidia-smi", "--collector.cgroups.force-version", "v2", }, )