diff --git a/pkg/inference/backends/llamacpp/download_windows.go b/pkg/inference/backends/llamacpp/download_windows.go index c60574c71..63a3ee460 100644 --- a/pkg/inference/backends/llamacpp/download_windows.go +++ b/pkg/inference/backends/llamacpp/download_windows.go @@ -15,7 +15,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, llamaCppPath, vendoredServerStoragePath string, ) error { nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe") - var canUseCUDA11, canUseOpenCL bool + var canUseCUDA11, canUseOpenCL, canUseVulkan bool var err error ShouldUseGPUVariantLock.Lock() defer ShouldUseGPUVariantLock.Unlock() @@ -27,6 +27,17 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, l.status = inference.FormatError(fmt.Sprintf("failed to check CUDA 11 capability: %v", err)) return fmt.Errorf("failed to check CUDA 11 capability: %w", err) } + if !canUseCUDA11 { + // Check for Vulkan-capable GPUs (Intel Arc, AMD, etc.) when CUDA + // is not available. + // TODO: publish a "vulkan" variant of docker/docker-model-backend-llamacpp + // to Docker Hub so this detection selects a Vulkan-optimised build. + canUseVulkan, err = hasVulkan() + if err != nil { + l.status = inference.FormatError(fmt.Sprintf("failed to check Vulkan capability: %v", err)) + return fmt.Errorf("failed to check Vulkan capability: %w", err) + } + } case "arm64": canUseOpenCL, err = hasOpenCL() if err != nil { @@ -39,6 +50,8 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, desiredVariant := "cpu" if canUseCUDA11 { desiredVariant = "cuda" + } else if canUseVulkan { + desiredVariant = "vulkan" } else if canUseOpenCL { desiredVariant = "opencl" } diff --git a/pkg/inference/backends/llamacpp/gpuinfo_windows.go b/pkg/inference/backends/llamacpp/gpuinfo_windows.go index e0bb0f646..be13693a9 100644 --- a/pkg/inference/backends/llamacpp/gpuinfo_windows.go +++ b/pkg/inference/backends/llamacpp/gpuinfo_windows.go @@ -20,6 +20,9 @@ func hasNVIDIAGPU() (bool, error) { return false, err } for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil { + continue + } if strings.ToLower(gpu.DeviceInfo.Vendor.Name) == "nvidia" { return true, nil } @@ -65,6 +68,9 @@ func hasSupportedAdrenoGPU() (bool, error) { return false, err } for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Product == nil { + continue + } isAdrenoFamily := strings.Contains(gpu.DeviceInfo.Product.Name, "Adreno") || strings.Contains(gpu.DeviceInfo.Product.Name, "Qualcomm") if isAdrenoFamily { @@ -99,14 +105,62 @@ func hasOpenCL() (bool, error) { return true, nil } +// hasVulkanCapableGPU returns true if at least one GPU that is neither +// NVIDIA (handled via CUDA) nor a Qualcomm Adreno (handled via OpenCL) +// is detected. Intel Arc, AMD, and other Vulkan-capable discrete or +// integrated GPUs fall into this category. +func hasVulkanCapableGPU() (bool, error) { + gpus, err := ghw.GPU() + if err != nil { + return false, err + } + for _, gpu := range gpus.GraphicsCards { + if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil || gpu.DeviceInfo.Product == nil { + continue + } + vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name) + product := gpu.DeviceInfo.Product.Name + isNVIDIA := vendor == "nvidia" + isAdreno := strings.Contains(product, "Adreno") || strings.Contains(product, "Qualcomm") + if !isNVIDIA && !isAdreno { + return true, nil + } + } + return false, nil +} + +// hasVulkan returns true when a non-CUDA/non-OpenCL GPU is present AND +// the Vulkan runtime library (vulkan-1.dll) is loadable. This mirrors +// the OpenCL.dll probe used by hasOpenCL. +func hasVulkan() (bool, error) { + capable, err := hasVulkanCapableGPU() + if !capable || err != nil { + return false, err + } + h, err := syscall.LoadLibrary("vulkan-1.dll") + if err != nil { + if errors.Is(err, syscall.ERROR_MOD_NOT_FOUND) { + return false, nil + } + return false, fmt.Errorf("unable to load Vulkan DLL: %w", err) + } + syscall.FreeLibrary(h) + return true, nil +} + func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) { // We don't ship com.docker.nv-gpu-info.exe on Windows/ARM64 at the moment, - // so skip the CUDA check there for now. The OpenCL check is portable. + // so skip the CUDA and Vulkan checks there for now. The OpenCL check is portable. if runtime.GOARCH == "amd64" { haveCUDA11GPU, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin) if haveCUDA11GPU || err != nil { return haveCUDA11GPU, err } + // No CUDA GPU found: check for Vulkan-capable GPUs (Intel Arc, AMD, etc.). + haveVulkan, err := hasVulkan() + if haveVulkan || err != nil { + return haveVulkan, err + } } return hasOpenCL() }