From 0d889b876330fe1cab356f88d45846500bbad1fe Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Tue, 1 Jul 2025 11:19:15 +0800 Subject: [PATCH 1/2] Temporarily disables Split-KV feature Forces num_splits to 1 and resets accumulator tensors to avoid memory overhead while bugs in Split-KV implementation are being fixed. This ensures stability by bypassing the problematic feature until issues are resolved. --- csrc/flash_api.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/csrc/flash_api.cpp b/csrc/flash_api.cpp index 2e04840..b845e02 100644 --- a/csrc/flash_api.cpp +++ b/csrc/flash_api.cpp @@ -255,6 +255,13 @@ std::tuple set_params_splitkv( TORCH_CHECK(params.num_splits <= 128, "num_splits > 128 not supported"); } + // Temporarily disable Split-KV, because some bugs are still being fixed. + // Regardless of how it is set externally, always set num_splits back to 1. + // This is to avoid the extra memory overhead of Split-KV. + params.num_splits = 1; + softmax_lse_accum.reset(); + out_accum.reset(); + return std::make_tuple(softmax_lse_accum, out_accum); } From f25f6ef6b184861836c793e9c569b5f2c410a4d9 Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Tue, 1 Jul 2025 11:35:34 +0800 Subject: [PATCH 2/2] Adds issue reference to Split-KV disable comment Documents the GitHub issue tracking the Split-KV bugs that led to temporarily disabling the feature. This provides better context for future developers about why the functionality is disabled and where to find related discussion. --- csrc/flash_api.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/flash_api.cpp b/csrc/flash_api.cpp index b845e02..1d948ea 100644 --- a/csrc/flash_api.cpp +++ b/csrc/flash_api.cpp @@ -256,6 +256,7 @@ std::tuple set_params_splitkv( } // Temporarily disable Split-KV, because some bugs are still being fixed. + // See: https://github.com/SmallDoges/flash-dmattn/issues/47 // Regardless of how it is set externally, always set num_splits back to 1. // This is to avoid the extra memory overhead of Split-KV. params.num_splits = 1;