diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8acf10d..0650d02 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -43,7 +43,7 @@ jobs: matrix: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. - os: [ubuntu-22.04] + os: [ubuntu-22.04, ubuntu-22.04-arm64] python-version: ["3.9", "3.10", "3.11", "3.12"] torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] cuda-version: ["12.9.1"] @@ -52,7 +52,7 @@ jobs: # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) # when building without C++11 ABI and using it on nvcr images. cxx11_abi: ["FALSE", "TRUE"] - arch: ["80", "90"] + arch: ["80", "90", "100", "120"] include: - torch-version: "2.9.0.dev20250904" cuda-version: "13.0" diff --git a/setup.py b/setup.py index 99b875e..95015cd 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ def should_skip_cuda_build(): @functools.lru_cache(maxsize=None) def cuda_archs(): - return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90").split(";") + return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90;100;120").split(";") def detect_preferred_sm_arch() -> Optional[str]: