From 866e279592bbbaca569146c2d1c814b35a7d1c28 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 14:44:35 +0100 Subject: [PATCH 01/17] test(windows): move back to original cuda-toolkit setup gha --- .github/workflows/build_kernel_windows.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 24a36e65..d07854bb 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -12,9 +12,11 @@ jobs: strategy: matrix: os: [ windows-2022 ] - python: [ '3.12', '3.13' ] + python: [ '3.11', '3.12', '3.13' ] torch: [ - { version: '2.8', cuda: '12.9.1', wheel: '129' } + { version: '2.9.1', cuda: '12.6.3', wheel: '126' } + { version: '2.9.1', cuda: '12.8.1', wheel: '128' } + { version: '2.9.1', cuda: '13.0.2', wheel: '130' } ] name: Build kernel @@ -32,7 +34,7 @@ jobs: - uses: actions/checkout@v5 # CUDA environment setup - - uses: N-Storm/cuda-toolkit@v0.2.28 + - uses: Jimver/cuda-toolkit@v0.2.29 id: setup-cuda-toolkit with: cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? From ab319e8d38f2fbf401b83319c6540c7eaa6704a2 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 14:53:34 +0100 Subject: [PATCH 02/17] test(windows): disable cuda13 for now --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index d07854bb..4d208647 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -16,7 +16,7 @@ jobs: torch: [ { version: '2.9.1', cuda: '12.6.3', wheel: '126' } { version: '2.9.1', cuda: '12.8.1', wheel: '128' } - { version: '2.9.1', cuda: '13.0.2', wheel: '130' } +# { version: '2.9.1', cuda: '13.0.2', wheel: '130' } ] name: Build kernel From f496481329e5f40e88bab54cfc243712f47a82d2 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 15:43:59 +0100 Subject: [PATCH 03/17] test(windows): py312 for now --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 4d208647..7ea4b01a 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ windows-2022 ] - python: [ '3.11', '3.12', '3.13' ] + python: [ '3.12' ] torch: [ { version: '2.9.1', cuda: '12.6.3', wheel: '126' } { version: '2.9.1', cuda: '12.8.1', wheel: '128' } From 9d5735f66d2a64eb1ebf6df90247cffe164b96f2 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 15:55:37 +0100 Subject: [PATCH 04/17] test(windows): force the branch to run wf --- .github/workflows/build_kernel_windows.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 7ea4b01a..33b35631 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -1,7 +1,7 @@ name: "Build and test kernel - Windows" on: push: - branches: [main] + branches: [main, build-and-release-ci-win] pull_request: branches: [main] types: [opened, synchronize, reopened] # trigger on PRs @@ -58,7 +58,7 @@ jobs: cache: 'pip' - name: Install PyTorch - run: pip install torch --index-url https://download.pytorch.org/whl/cu129 + run: pip install torch --index-url https://download.pytorch.org/whl/${{ matrix.torch.wheel }} - name: Build activation kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) From 07c4ea0a2b5a55a6f0b28e3e388206c53c004acb Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 16:01:32 +0100 Subject: [PATCH 05/17] test(windows): uh? --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 33b35631..5e64a418 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ windows-2022 ] - python: [ '3.12' ] + python: [ 3.12 ] torch: [ { version: '2.9.1', cuda: '12.6.3', wheel: '126' } { version: '2.9.1', cuda: '12.8.1', wheel: '128' } From 111d322627c7976fe8ab3bfe09e26b2c95d6343c Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 16:03:39 +0100 Subject: [PATCH 06/17] test(windows): uh? --- .github/workflows/build_kernel_windows.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 5e64a418..f42d9676 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -14,9 +14,8 @@ jobs: os: [ windows-2022 ] python: [ 3.12 ] torch: [ - { version: '2.9.1', cuda: '12.6.3', wheel: '126' } + { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, { version: '2.9.1', cuda: '12.8.1', wheel: '128' } -# { version: '2.9.1', cuda: '13.0.2', wheel: '130' } ] name: Build kernel From 061f164d222ead66da3e91003489ef23546c8257 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 16:39:39 +0100 Subject: [PATCH 07/17] test(windows): missing `cu` prefix in pytorch index-url --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index f42d9676..a8b185d2 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -57,7 +57,7 @@ jobs: cache: 'pip' - name: Install PyTorch - run: pip install torch --index-url https://download.pytorch.org/whl/${{ matrix.torch.wheel }} + run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }} - name: Build activation kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) From 66e57a371a853f6d4c3f79da9b603f279133298e Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 14 Nov 2025 17:44:23 +0100 Subject: [PATCH 08/17] test(windows): remove activation example test --- .github/workflows/build_kernel_windows.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index a8b185d2..ced0ca08 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -59,11 +59,6 @@ jobs: - name: Install PyTorch run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }} - - name: Build activation kernel - run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) -# - name: Copy activation kernel -# run: cp -rL examples/activation/build activation-kernel - - name: Build cutlass GEMM kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy cutlass GEMM kernel From b4fc5fdd05ac6c8270460777273391dbf510d7bd Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 17 Nov 2025 10:42:59 +0100 Subject: [PATCH 09/17] test(windows): reintroduce cuda13 --- .github/workflows/build_kernel_windows.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index ced0ca08..0340b360 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -16,6 +16,7 @@ jobs: torch: [ { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, { version: '2.9.1', cuda: '12.8.1', wheel: '128' } + { version: '2.9.1', cuda: '13.0.1', wheel: '130' } ] name: Build kernel From 8ab2ef927daefa0fa798ad00a47e13ed991476b7 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 17 Nov 2025 10:45:30 +0100 Subject: [PATCH 10/17] test(windows): missing coma --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 0340b360..13c9e0f7 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -15,7 +15,7 @@ jobs: python: [ 3.12 ] torch: [ { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, - { version: '2.9.1', cuda: '12.8.1', wheel: '128' } + { version: '2.9.1', cuda: '12.8.1', wheel: '128' }, { version: '2.9.1', cuda: '13.0.1', wheel: '130' } ] From 283d32e9d9618cf33b9411b14ecef23d3e578900 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 17 Nov 2025 11:01:48 +0100 Subject: [PATCH 11/17] test(windows): disable branch name in push dispatch --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 13c9e0f7..8acdf21b 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -1,7 +1,7 @@ name: "Build and test kernel - Windows" on: push: - branches: [main, build-and-release-ci-win] + branches: [main] pull_request: branches: [main] types: [opened, synchronize, reopened] # trigger on PRs From 06192361be1c83498541f6d6df4fdf792b266afc Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 17 Nov 2025 15:38:39 +0100 Subject: [PATCH 12/17] test(windows): disable cutlass for now to test the others --- .github/workflows/build_kernel_windows.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 8acdf21b..f9d10927 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -14,9 +14,9 @@ jobs: os: [ windows-2022 ] python: [ 3.12 ] torch: [ - { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, +# { version: '2.9.1', cuda: '12.6.3', wheel: '126' }, { version: '2.9.1', cuda: '12.8.1', wheel: '128' }, - { version: '2.9.1', cuda: '13.0.1', wheel: '130' } +# { version: '2.9.1', cuda: '13.0.1', wheel: '130' } ] name: Build kernel @@ -60,8 +60,8 @@ jobs: - name: Install PyTorch run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }} - - name: Build cutlass GEMM kernel - run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) +# - name: Build cutlass GEMM kernel +# run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy cutlass GEMM kernel # run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel From be0709488644422fab9dc7616b6685e845a43e6c Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 19 Nov 2025 10:38:22 +0100 Subject: [PATCH 13/17] fix(windows): force _WIN32 definition to be sure guard on PyTorch are not bypass --- build2cmake/src/templates/windows.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build2cmake/src/templates/windows.cmake b/build2cmake/src/templates/windows.cmake index 6685f97f..b7991188 100644 --- a/build2cmake/src/templates/windows.cmake +++ b/build2cmake/src/templates/windows.cmake @@ -1,3 +1,6 @@ +# Ensure _WIN32 is always defined on Windows +add_compile_definitions(_WIN32) + # Generate a standardized build variant name following the pattern: # torch---windows # From cdd0f7078829ff5b7a9217f6bb55fe8367598365 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 19 Nov 2025 11:27:34 +0100 Subject: [PATCH 14/17] fix(windows): remove the typo in preambule and remove duplicated add_compile_definition in windows.cmake --- build2cmake/src/templates/cuda/preamble.cmake | 2 +- build2cmake/src/templates/windows.cmake | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index 78cd834c..b2c0328b 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -107,7 +107,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake) # This preprocessor macro should be defined in building with MSVC but not for CUDA and co. # Also, if not using MVSC, this may not be set too ... # So we explicitly set it to avoid any side effect due to preprocessor-guards not being defined. -add_compile_definitions(_WIN32>) +add_compile_definitions(_WIN32) # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") diff --git a/build2cmake/src/templates/windows.cmake b/build2cmake/src/templates/windows.cmake index b7991188..6685f97f 100644 --- a/build2cmake/src/templates/windows.cmake +++ b/build2cmake/src/templates/windows.cmake @@ -1,6 +1,3 @@ -# Ensure _WIN32 is always defined on Windows -add_compile_definitions(_WIN32) - # Generate a standardized build variant name following the pattern: # torch---windows # From fcd838a6eeaee69ffdc6bda06c79d62ffa1c1fad Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 19 Nov 2025 11:48:14 +0100 Subject: [PATCH 15/17] fix(windows): ok the issue is not with _WIN32 but with USE_CUDA=1 which is not set --- build2cmake/src/templates/cuda/preamble.cmake | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index b2c0328b..1f709da2 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -104,10 +104,11 @@ message(STATUS "Rendered for platform {{ platform }}") {% if platform == 'windows' %} include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake) -# This preprocessor macro should be defined in building with MSVC but not for CUDA and co. -# Also, if not using MVSC, this may not be set too ... -# So we explicitly set it to avoid any side effect due to preprocessor-guards not being defined. -add_compile_definitions(_WIN32) +if(GPU_LANG STREQUAL "CUDA") + add_compile_definitions(USE_CUDA=1) +elseif(GPU STREQUAL "HIP") + add_compile_definitions(USE_ROCM=1) +endif() # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") From 89605dd750cb22e7877a41cf69fc94f5a5f565c0 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 19 Nov 2025 14:52:08 +0100 Subject: [PATCH 16/17] fix(windows): readd missing cutlass test --- .github/workflows/build_kernel_windows.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index f9d10927..7f5a99b0 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -60,10 +60,10 @@ jobs: - name: Install PyTorch run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }} -# - name: Build cutlass GEMM kernel -# run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) -# - name: Copy cutlass GEMM kernel -# run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel + - name: Build cutlass GEMM kernel + run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) + - name: Copy cutlass GEMM kernel + run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel - name: Build relu kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/relu -BuildConfig Release -Backend cuda -Build -Force ) From c8c1cb90bda2da21ad11f970e0f360f28e2d29ab Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 19 Nov 2025 16:09:21 +0100 Subject: [PATCH 17/17] fix(windows): ok we should not copy cutlass kernels --- .github/workflows/build_kernel_windows.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 7f5a99b0..e803f688 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -62,8 +62,8 @@ jobs: - name: Build cutlass GEMM kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) - - name: Copy cutlass GEMM kernel - run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel +# - name: Copy cutlass GEMM kernel +# run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel - name: Build relu kernel run: ( scripts\windows\builder.ps1 -SourceFolder examples/relu -BuildConfig Release -Backend cuda -Build -Force )