From dd73ce388bdd930665609bf10b0370557f36bad7 Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:16:17 +0800 Subject: [PATCH 1/5] Updates architecture matrix to include only relevant architectures for wheel builds --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 38bdce0..2b5bd18 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -52,7 +52,7 @@ jobs: # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) # when building without C++11 ABI and using it on nvcr images. cxx11_abi: ["FALSE", "TRUE"] - arch: ["80", "86", "89", "90", "100", "120"] + arch: ["80", "90"] include: - torch-version: "2.9.0.dev20250904" cuda-version: "13.0" From 26bd08de2a3a3966b52827b33ac8542cbb616e36 Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:16:50 +0800 Subject: [PATCH 2/5] Updates default CUDA architectures to include only relevant versions for builds --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8c847ac..99b875e 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ def should_skip_cuda_build(): @functools.lru_cache(maxsize=None) def cuda_archs(): - return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;86;89;90;100;120").split(";") + return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90").split(";") def detect_preferred_sm_arch() -> Optional[str]: From 735df584894c3529fe425d0e7bb31bbbfbf701ce Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:26:55 +0800 Subject: [PATCH 3/5] Refines build matrix by removing ARM64 OS and updating Python versions for compatibility --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2b5bd18..8acf10d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -43,8 +43,8 @@ jobs: matrix: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. - os: [ubuntu-22.04, ubuntu-22.04-arm64] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + os: [ubuntu-22.04] + python-version: ["3.9", "3.10", "3.11", "3.12"] torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] cuda-version: ["12.9.1"] # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. From 4d9cec0f9faddf5d5ba63fcddb2b6caedcf8eae3 Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:31:10 +0800 Subject: [PATCH 4/5] Refines Python and Torch version specifications for improved compatibility --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8acf10d..76c50fb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -44,8 +44,8 @@ jobs: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. os: [ubuntu-22.04] - python-version: ["3.9", "3.10", "3.11", "3.12"] - torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] + python-version: ["3.9", "3.10", "3.12"] + torch-version: ["2.6.0", "2.7.1", "2.8.0"] cuda-version: ["12.9.1"] # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. From 2d7f6b658e8ea40f39580b5b5d5dca8533077dfd Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:32:57 +0800 Subject: [PATCH 5/5] Updates Python and Torch version specifications for improved compatibility --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 76c50fb..8acf10d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -44,8 +44,8 @@ jobs: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. os: [ubuntu-22.04] - python-version: ["3.9", "3.10", "3.12"] - torch-version: ["2.6.0", "2.7.1", "2.8.0"] + python-version: ["3.9", "3.10", "3.11", "3.12"] + torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] cuda-version: ["12.9.1"] # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.