Skip to content

Commit ec176ee

Browse files
fix kernel selection logic
1 parent 3e1ca0c commit ec176ee

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
306306
while (gqa_ratio % (2*gqa_ratio_eff) == 0 && gqa_ratio_eff < ncols2_max) {
307307
gqa_ratio_eff *= 2;
308308
}
309-
if (Q->ne[1] * gqa_ratio_eff <= 2) {
309+
if (can_use_vector_kernel && Q->ne[1] * gqa_ratio_eff <= 2) {
310310
return BEST_FATTN_KERNEL_VEC;
311311
}
312312
if (Q->ne[1] * gqa_ratio_eff <= 16) {

0 commit comments

Comments
 (0)