diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index 463171323..be6ab0055 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -55,6 +55,7 @@ const ( type cliOptions struct { preferredAllocationPolicy string + resourcePrefix string sharedDevNum int enableMonitoring bool resourceManagement bool @@ -176,7 +177,7 @@ func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugi if options.resourceManagement { var err error - dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+deviceType) + dp.resMan, err = rm.NewResourceManager(monitorID, namespace+"/"+options.resourcePrefix+deviceType) if err != nil { klog.Errorf("Failed to create resource manager: %+v", err) return nil @@ -330,7 +331,8 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { } if dp.options.enableMonitoring { - klog.V(4).Infof("Adding %s to GPU %s/%s", devPath, monitorType, monitorID) + klog.V(4).Infof("Adding %s to GPU resource %s%s/%s", devPath, + dp.options.resourcePrefix, monitorType, monitorID) monitor = append(monitor, devSpec) } @@ -343,7 +345,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { devID := fmt.Sprintf("%s-%d", f.Name(), i) // Currently only one device type (i915) is supported. // TODO: check model ID to differentiate device models. - devTree.AddDevice(deviceType, devID, deviceInfo) + devTree.AddDevice(dp.options.resourcePrefix+deviceType, devID, deviceInfo) rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil) } @@ -352,7 +354,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { // all Intel GPUs are under single monitoring resource if len(monitor) > 0 { deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, monitor, nil, nil, nil) - devTree.AddDevice(monitorType, monitorID, deviceInfo) + devTree.AddDevice(dp.options.resourcePrefix+monitorType, monitorID, deviceInfo) } if dp.resMan != nil { @@ -371,10 +373,10 @@ func (dp *devicePlugin) Allocate(request *pluginapi.AllocateRequest) (*pluginapi } func main() { - var prefix string + var faked string var opts cliOptions - flag.StringVar(&fprefix, "fake-mode", "", "Prefix for devfs & sysfs paths") + flag.StringVar(&faked, "fake-mode", "", "Comma separated prefix for devfs & sysfs paths + prefix for resources names") flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable 'i915_monitoring' (= all GPUs) resource") flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management") flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device") @@ -400,12 +402,21 @@ func main() { klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy) var sysfs, devfs string - if prefix != "" { - sysfs = prefix + sysfsDrmDirectory - devfs = prefix + devfsDriDirectory + if faked != "" { + prefixes := strings.Split(faked, ",") + if len(prefixes) != 2 { + klog.Fatalf("%d commas in fake-mode option value, not one", len(prefixes)-1) + } + sysfs = prefixes[0] + sysfsDrmDirectory + devfs = prefixes[0] + devfsDriDirectory + opts.resourcePrefix = prefixes[1] + if opts.resourceManagement && opts.resourcePrefix != "" { + klog.Warning("Resource name prefix breaks resource management as it hard-codes their names") + } } else { sysfs = sysfsDrmDirectory devfs = devfsDriDirectory + opts.resourcePrefix = "" } plugin := newDevicePlugin(sysfs, devfs, opts) manager := dpapi.NewManager(namespace, plugin)