Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion pkg/inference/backends/llamacpp/download_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
llamaCppPath, vendoredServerStoragePath string,
) error {
nvGPUInfoBin := filepath.Join(vendoredServerStoragePath, "com.docker.nv-gpu-info.exe")
var canUseCUDA11, canUseOpenCL bool
var canUseCUDA11, canUseOpenCL, canUseVulkan bool
var err error
ShouldUseGPUVariantLock.Lock()
defer ShouldUseGPUVariantLock.Unlock()
Expand All @@ -27,6 +27,17 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
l.status = inference.FormatError(fmt.Sprintf("failed to check CUDA 11 capability: %v", err))
return fmt.Errorf("failed to check CUDA 11 capability: %w", err)
}
if !canUseCUDA11 {
// Check for Vulkan-capable GPUs (Intel Arc, AMD, etc.) when CUDA
// is not available.
// TODO: publish a "vulkan" variant of docker/docker-model-backend-llamacpp
// to Docker Hub so this detection selects a Vulkan-optimised build.
canUseVulkan, err = hasVulkan()
if err != nil {
l.status = inference.FormatError(fmt.Sprintf("failed to check Vulkan capability: %v", err))
return fmt.Errorf("failed to check Vulkan capability: %w", err)
}
}
case "arm64":
canUseOpenCL, err = hasOpenCL()
if err != nil {
Expand All @@ -39,6 +50,8 @@ func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger,
desiredVariant := "cpu"
if canUseCUDA11 {
desiredVariant = "cuda"
} else if canUseVulkan {
desiredVariant = "vulkan"
} else if canUseOpenCL {
desiredVariant = "opencl"
}
Expand Down
56 changes: 55 additions & 1 deletion pkg/inference/backends/llamacpp/gpuinfo_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ func hasNVIDIAGPU() (bool, error) {
return false, err
}
for _, gpu := range gpus.GraphicsCards {
if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil {
continue
}
if strings.ToLower(gpu.DeviceInfo.Vendor.Name) == "nvidia" {
return true, nil
}
Expand Down Expand Up @@ -65,6 +68,9 @@ func hasSupportedAdrenoGPU() (bool, error) {
return false, err
}
for _, gpu := range gpus.GraphicsCards {
if gpu.DeviceInfo == nil || gpu.DeviceInfo.Product == nil {
continue
}
isAdrenoFamily := strings.Contains(gpu.DeviceInfo.Product.Name, "Adreno") ||
strings.Contains(gpu.DeviceInfo.Product.Name, "Qualcomm")
if isAdrenoFamily {
Expand Down Expand Up @@ -99,14 +105,62 @@ func hasOpenCL() (bool, error) {
return true, nil
}

// hasVulkanCapableGPU returns true if at least one GPU that is neither
// NVIDIA (handled via CUDA) nor a Qualcomm Adreno (handled via OpenCL)
// is detected. Intel Arc, AMD, and other Vulkan-capable discrete or
// integrated GPUs fall into this category.
func hasVulkanCapableGPU() (bool, error) {
gpus, err := ghw.GPU()
if err != nil {
return false, err
}
for _, gpu := range gpus.GraphicsCards {
if gpu.DeviceInfo == nil || gpu.DeviceInfo.Vendor == nil || gpu.DeviceInfo.Product == nil {
continue
}
vendor := strings.ToLower(gpu.DeviceInfo.Vendor.Name)
product := gpu.DeviceInfo.Product.Name
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
Comment thread
darthcav marked this conversation as resolved.
isNVIDIA := vendor == "nvidia"
isAdreno := strings.Contains(product, "Adreno") || strings.Contains(product, "Qualcomm")
if !isNVIDIA && !isAdreno {
return true, nil
}
}
return false, nil
}

// hasVulkan returns true when a non-CUDA/non-OpenCL GPU is present AND
// the Vulkan runtime library (vulkan-1.dll) is loadable. This mirrors
// the OpenCL.dll probe used by hasOpenCL.
func hasVulkan() (bool, error) {
capable, err := hasVulkanCapableGPU()
if !capable || err != nil {
return false, err
}
h, err := syscall.LoadLibrary("vulkan-1.dll")
if err != nil {
if errors.Is(err, syscall.ERROR_MOD_NOT_FOUND) {
return false, nil
}
return false, fmt.Errorf("unable to load Vulkan DLL: %w", err)
}
syscall.FreeLibrary(h)
return true, nil
}

func CanUseGPU(ctx context.Context, nvGPUInfoBin string) (bool, error) {
// We don't ship com.docker.nv-gpu-info.exe on Windows/ARM64 at the moment,
// so skip the CUDA check there for now. The OpenCL check is portable.
// so skip the CUDA and Vulkan checks there for now. The OpenCL check is portable.
if runtime.GOARCH == "amd64" {
haveCUDA11GPU, err := hasCUDA11CapableGPU(ctx, nvGPUInfoBin)
if haveCUDA11GPU || err != nil {
return haveCUDA11GPU, err
}
// No CUDA GPU found: check for Vulkan-capable GPUs (Intel Arc, AMD, etc.).
haveVulkan, err := hasVulkan()
if haveVulkan || err != nil {
return haveVulkan, err
}
}
return hasOpenCL()
}
Loading