From 04ba790f82958db1301ae9e16215b199a5ead6eb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Sep 2025 03:28:32 +0000
Subject: [PATCH 1/3] Initial plan


From fcad8b6a7a3ac5cfb0a4c7c21fa973a1f7de2485 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Sep 2025 03:36:52 +0000
Subject: [PATCH 2/3] Fix installation error with better fallback and error
 messages

Co-authored-by: LoserCheems <124847097+LoserCheems@users.noreply.github.com>
---
 flash_dmattn/__init__.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/flash_dmattn/__init__.py b/flash_dmattn/__init__.py
index 825d590..0389d8c 100644
--- a/flash_dmattn/__init__.py
+++ b/flash_dmattn/__init__.py
@@ -69,10 +69,15 @@
 ]
 
 
+def _is_cuda_fully_available():
+    """Check if CUDA backend is fully available (both module and functions)."""
+    return CUDA_AVAILABLE and flash_dmattn_func is not None
+
+
 def get_available_backends():
     """Return a list of available backends."""
     backends = []
-    if CUDA_AVAILABLE:
+    if _is_cuda_fully_available():
         backends.append("cuda")
     if TRITON_AVAILABLE:
         backends.append("triton")
@@ -94,21 +99,28 @@ def flash_dmattn_func_auto(backend: Optional[str] = None, **kwargs):
         The attention function for the specified or auto-selected backend.
     """
     if backend is None:
-        # Auto-select backend
-        if CUDA_AVAILABLE:
+        # Auto-select backend - use the first fully working backend
+        if _is_cuda_fully_available():
             backend = "cuda"
         elif TRITON_AVAILABLE:
             backend = "triton"
         elif FLEX_AVAILABLE:
             backend = "flex"
         else:
-            raise RuntimeError("No flash attention backend is available. Please install at least one of: triton, transformers, or build the CUDA extension.")
+            # Provide helpful error message based on what's partially available
+            error_parts = ["No flash attention backend is fully available."]
+            if CUDA_AVAILABLE and flash_dmattn_func is None:
+                error_parts.append("CUDA extension was found but interface functions are not available - please rebuild the CUDA extension with: pip install -e .")
+            else:
+                error_parts.append("CUDA extension is not built - please install with: pip install -e .")
+            error_parts.append("Alternatively, install alternative backends: pip install triton (for Triton backend) or pip install transformers (for Flex backend).")
+            raise RuntimeError(" ".join(error_parts))
     
     if backend == "cuda":
         if not CUDA_AVAILABLE:
-            raise RuntimeError("CUDA backend is not available. Please build the CUDA extension.")
+            raise RuntimeError("CUDA backend is not available. Please build the CUDA extension with: pip install -e .")
         if flash_dmattn_func is None:
-            raise RuntimeError("CUDA flash_dmattn_func is not available. Please check the installation.")
+            raise RuntimeError("CUDA extension was found but interface functions are not available. This may indicate an incomplete installation. Please rebuild the CUDA extension with: pip install -e .")
         return flash_dmattn_func
     
     elif backend == "triton":

From c45b4cc3cff7826933a24e59a9c01889c0791b4f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Sep 2025 03:38:55 +0000
Subject: [PATCH 3/3] Update documentation with better usage patterns and
 troubleshooting

Co-authored-by: LoserCheems <124847097+LoserCheems@users.noreply.github.com>
---
 README.md | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2bbef0a..4eaf0c9 100644
--- a/README.md
+++ b/README.md
@@ -99,8 +99,8 @@ if seq_len > keep_window_size:
     attention_mask.zero_()
     attention_mask.scatter(-1, topk_indices, 1.0)
 
-# Select backend
-flash_dmattn_func = flash_dmattn_func_auto(backend="cuda")
+# Select backend (auto-selects the best available backend)
+flash_dmattn_func = flash_dmattn_func_auto()  # Automatically chooses CUDA, Triton, or Flex
 
 # Run Flash Dynamic Mask Attention
 output = flash_dmattn_func(
@@ -232,12 +232,40 @@ python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
 ```python
 # Test basic import
 try:
-    from flash_dmattn import flash_dmattn_func, get_available_backends
+    from flash_dmattn import flash_dmattn_func_auto, get_available_backends
     print("✅ Flash Dynamic Mask Attention imported successfully")
     print(f"Available backends: {get_available_backends()}")
+    
+    # Test auto backend selection (recommended)
+    func = flash_dmattn_func_auto()
+    print("✅ Auto backend selection works")
 except ImportError as e:
     print(f"❌ Import failed: {e}")
     print("Please ensure the package is properly installed with: pip install -e .")
+except RuntimeError as e:
+    print(f"❌ Backend error: {e}")
+    # The error message will provide specific guidance on what to install
+```
+
+**CUDA Extension Issues**
+```python
+# If you get "CUDA flash_dmattn_func is not available" error:
+# This usually means the CUDA extension was partially installed
+
+# Solution 1: Rebuild the CUDA extension
+# pip install -e . --force-reinstall
+
+# Solution 2: Use alternative backends
+# pip install triton  # For Triton backend
+# pip install transformers  # For Flex backend
+
+# Test with fallback backends
+from flash_dmattn import flash_dmattn_func_auto
+try:
+    func = flash_dmattn_func_auto()  # Will auto-select working backend
+    print("✅ Using backend:", func.__name__)
+except RuntimeError as e:
+    print(f"❌ No backends available: {e}")
 ```
 
 **Performance Issues**