Skip to content

Commit

Permalink
Support latest cuda toolkits and fix a shfl.up.sync bug (#178)
Browse files Browse the repository at this point in the history
* Add sm_90 to cmake

* Remove sm_35 after cuda 12.0

* Fix incorrect args of shfl.up.sync
  • Loading branch information
zlsh80826 committed Nov 30, 2022
1 parent 83ab865 commit bdc2b45
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ endif()
IF (CUDA_VERSION VERSION_LESS "11.0")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_30,code=sm_30")
ENDIF()
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")

# sm35 is deprecated after cuda 12.0
IF (CUDA_VERSION VERSION_LESS "12.0")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")
ENDIF()

set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_50,code=sm_50")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_52,code=sm_52")
Expand All @@ -94,6 +98,10 @@ IF ((CUDA_VERSION VERSION_GREATER "11.2") OR (CUDA_VERSION VERSION_EQUAL "11.2")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
ENDIF()

IF ((CUDA_VERSION VERSION_GREATER "11.8") OR (CUDA_VERSION VERSION_EQUAL "11.8"))
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_90,code=sm_90")
ENDIF()

IF(NOT APPLE AND NOT WIN32)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --std=c++11")
if(WITH_OMP)
Expand Down
4 changes: 2 additions & 2 deletions include/contrib/moderngpu/include/device/intrinsics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ MGPU_DEVICE int shfl_add(int x, int offset, int width = WARP_SIZE) {
asm(
"{.reg .s32 r0;"
".reg .pred p;"
"shfl.up.sync.b32 r0|p, %1, %2, %3, %4;"
"shfl.up.sync.b32 r0|p, %1, %2, %3, 0xFFFFFFFF;"
"@p add.s32 r0, r0, %4;"
"mov.s32 %0, r0; }"
: "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(x));
Expand All @@ -175,7 +175,7 @@ MGPU_DEVICE int shfl_max(int x, int offset, int width = WARP_SIZE) {
asm(
"{.reg .s32 r0;"
".reg .pred p;"
"shfl.up.sync.b32 r0|p, %1, %2, %3, %4;"
"shfl.up.sync.b32 r0|p, %1, %2, %3, 0xFFFFFFFF;"
"@p max.s32 r0, r0, %4;"
"mov.s32 %0, r0; }"
: "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(x));
Expand Down

0 comments on commit bdc2b45

Please sign in to comment.