-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
driver_linux.go
85 lines (78 loc) · 2.69 KB
/
driver_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package nvml
import (
"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
)
// Initialize nvml library by locating nvml shared object file and calling ldopen
func (n *nvmlDriver) Initialize() error {
return nvml.Init()
}
// Shutdown stops any further interaction with nvml
func (n *nvmlDriver) Shutdown() error {
return nvml.Shutdown()
}
// SystemDriverVersion returns installed driver version
func (n *nvmlDriver) SystemDriverVersion() (string, error) {
return nvml.GetDriverVersion()
}
// DeviceCount reports number of available GPU devices
func (n *nvmlDriver) DeviceCount() (uint, error) {
return nvml.GetDeviceCount()
}
// DeviceInfoByIndex returns DeviceInfo for index GPU in system device list
func (n *nvmlDriver) DeviceInfoByIndex(index uint) (*DeviceInfo, error) {
device, err := nvml.NewDevice(index)
if err != nil {
return nil, err
}
deviceMode, err := device.GetDeviceMode()
if err != nil {
return nil, err
}
return &DeviceInfo{
UUID: device.UUID,
Name: device.Model,
MemoryMiB: device.Memory,
PowerW: device.Power,
BAR1MiB: device.PCI.BAR1,
PCIBandwidthMBPerS: device.PCI.Bandwidth,
PCIBusID: device.PCI.BusID,
CoresClockMHz: device.Clocks.Cores,
MemoryClockMHz: device.Clocks.Memory,
DisplayState: deviceMode.DisplayInfo.Mode.String(),
PersistenceMode: deviceMode.Persistence.String(),
}, nil
}
// DeviceInfoByIndex returns DeviceInfo and DeviceStatus for index GPU in system device list
func (n *nvmlDriver) DeviceInfoAndStatusByIndex(index uint) (*DeviceInfo, *DeviceStatus, error) {
device, err := nvml.NewDevice(index)
if err != nil {
return nil, nil, err
}
status, err := device.Status()
if err != nil {
return nil, nil, err
}
return &DeviceInfo{
UUID: device.UUID,
Name: device.Model,
MemoryMiB: device.Memory,
PowerW: device.Power,
BAR1MiB: device.PCI.BAR1,
PCIBandwidthMBPerS: device.PCI.Bandwidth,
PCIBusID: device.PCI.BusID,
CoresClockMHz: device.Clocks.Cores,
MemoryClockMHz: device.Clocks.Memory,
}, &DeviceStatus{
TemperatureC: status.Temperature,
GPUUtilization: status.Utilization.GPU,
MemoryUtilization: status.Utilization.Memory,
EncoderUtilization: status.Utilization.Encoder,
DecoderUtilization: status.Utilization.Decoder,
UsedMemoryMiB: status.Memory.Global.Used,
ECCErrorsL1Cache: status.Memory.ECCErrors.L1Cache,
ECCErrorsL2Cache: status.Memory.ECCErrors.L2Cache,
ECCErrorsDevice: status.Memory.ECCErrors.Device,
PowerUsageW: status.Power,
BAR1UsedMiB: status.PCI.BAR1Used,
}, nil
}