From 97beceba558c4e329e13d7fab65cf4999fac75e3 Mon Sep 17 00:00:00 2001 From: Neil Mehta Date: Tue, 4 Mar 2025 15:03:16 -0500 Subject: [PATCH] CUDA: Fix new mma detection for Turing cards with Volta PTX --- ggml/src/ggml-cuda/common.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 1832314ec133b..6e91457c5224f 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -244,7 +244,8 @@ static bool fp16_mma_hardware_available(const int cc) { // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later. static bool new_mma_available(const int cc) { - return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_TURING; + return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA && + cc >= GGML_CUDA_CC_TURING; } static bool cp_async_available(const int cc) {