From 3065ed1ad996855b28b66eb2a1ee2e301dc2525d Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:36:17 +0800 Subject: [PATCH 1/2] Expands build matrix to include ARM64 OS and additional architectures for improved compatibility --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8acf10d..0650d02 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -43,7 +43,7 @@ jobs: matrix: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. - os: [ubuntu-22.04] + os: [ubuntu-22.04, ubuntu-22.04-arm64] python-version: ["3.9", "3.10", "3.11", "3.12"] torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] cuda-version: ["12.9.1"] @@ -52,7 +52,7 @@ jobs: # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) # when building without C++11 ABI and using it on nvcr images. cxx11_abi: ["FALSE", "TRUE"] - arch: ["80", "90"] + arch: ["80", "90", "100", "120"] include: - torch-version: "2.9.0.dev20250904" cuda-version: "13.0" From e78456e409ab8e49d8165951328bb4344879a2f3 Mon Sep 17 00:00:00 2001 From: LoserCheems <3314685395@qq.com> Date: Sat, 20 Sep 2025 20:36:29 +0800 Subject: [PATCH 2/2] Updates default CUDA architectures to include additional versions for improved compatibility --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 99b875e..95015cd 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ def should_skip_cuda_build(): @functools.lru_cache(maxsize=None) def cuda_archs(): - return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90").split(";") + return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90;100;120").split(";") def detect_preferred_sm_arch() -> Optional[str]: