From 6590b793c2b0693dd4299e1d482bf4b327d43692 Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Fri, 29 Aug 2025 16:25:09 +0800 Subject: [PATCH] Changes attention backend from flex to cuda Switches the flash attention backend parameter from "flex" to "cuda" to improve performance and compatibility with CUDA-enabled hardware acceleration. --- examples/modeling/modeling_doge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/modeling/modeling_doge.py b/examples/modeling/modeling_doge.py index e67cca2..804e145 100644 --- a/examples/modeling/modeling_doge.py +++ b/examples/modeling/modeling_doge.py @@ -304,7 +304,7 @@ def forward( attention_mask=attention_mask, ) - attention_interface: Callable = flash_dmattn_func_auto(backend="flex") + attention_interface: Callable = flash_dmattn_func_auto(backend="cuda") query_states = query_states.transpose(1, 2).contiguous() # [B, H, Q_LEN, D] key_states = key_states.transpose(1, 2).contiguous() # [B, H, KV_LEN, D] value_states = value_states.transpose(1, 2).contiguous() # [B, H, KV_LEN, D]