From 97beceba558c4e329e13d7fab65cf4999fac75e3 Mon Sep 17 00:00:00 2001
From: Neil Mehta <neil@lmstudio.ai>
Date: Tue, 4 Mar 2025 15:03:16 -0500
Subject: [PATCH] CUDA: Fix new mma detection for Turing cards with Volta PTX

---
 ggml/src/ggml-cuda/common.cuh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index 1832314ec133b..6e91457c5224f 100644
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -244,7 +244,8 @@ static bool fp16_mma_hardware_available(const int cc) {
 
 // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
 static bool new_mma_available(const int cc) {
-    return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_TURING;
+    return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA &&
+        cc >= GGML_CUDA_CC_TURING;
 }
 
 static bool cp_async_available(const int cc) {