From 6590b793c2b0693dd4299e1d482bf4b327d43692 Mon Sep 17 00:00:00 2001
From: Loser Cheems <losercheems@gmail.com>
Date: Fri, 29 Aug 2025 16:25:09 +0800
Subject: [PATCH] Changes attention backend from flex to cuda

Switches the flash attention backend parameter from "flex" to "cuda" to improve performance and compatibility with CUDA-enabled hardware acceleration.
---
 examples/modeling/modeling_doge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/modeling/modeling_doge.py b/examples/modeling/modeling_doge.py
index e67cca2..804e145 100644
--- a/examples/modeling/modeling_doge.py
+++ b/examples/modeling/modeling_doge.py
@@ -304,7 +304,7 @@ def forward(
             attention_mask=attention_mask,
         )
 
-        attention_interface: Callable = flash_dmattn_func_auto(backend="flex")
+        attention_interface: Callable = flash_dmattn_func_auto(backend="cuda")
         query_states = query_states.transpose(1, 2).contiguous()    # [B, H, Q_LEN, D]
         key_states = key_states.transpose(1, 2).contiguous()        # [B, H, KV_LEN, D]
         value_states = value_states.transpose(1, 2).contiguous()    # [B, H, KV_LEN, D]