Skip to content

Commit

Permalink
Drop Clang+CUDA workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
dalg24 committed Nov 3, 2023
1 parent 6fc7a49 commit 1e1ed13
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 12 deletions.
4 changes: 0 additions & 4 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -687,10 +687,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif

ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
endif

ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
else
Expand Down
1 change: 0 additions & 1 deletion cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
#cmakedefine KOKKOS_ENABLE_HBWSPACE
#cmakedefine KOKKOS_ENABLE_LIBDL
#cmakedefine KOKKOS_ENABLE_LIBQUADMATH
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#cmakedefine KOKKOS_ENABLE_ONEDPL

#cmakedefine KOKKOS_ARCH_SSE42
Expand Down
3 changes: 0 additions & 3 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
ELSEIF(CUDAToolkit_BIN_DIR)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
ENDIF()
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
ENDIF()
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
SET(CUDA_ARCH_FLAG "-gpu")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
Expand Down
4 changes: 0 additions & 4 deletions core/unit_test/TestTeamVector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,6 @@ struct checkScan {
};
} // namespace VectorScanReducer

#if !defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
TEST(TEST_CATEGORY, team_vector) {
ASSERT_TRUE((TestTeamVector::Test<TEST_EXECSPACE>(0)));
ASSERT_TRUE((TestTeamVector::Test<TEST_EXECSPACE>(1)));
Expand All @@ -1028,9 +1027,7 @@ TEST(TEST_CATEGORY, team_vector) {
ASSERT_TRUE((TestTeamVector::Test<TEST_EXECSPACE>(11)));
ASSERT_TRUE((TestTeamVector::Test<TEST_EXECSPACE>(12)));
}
#endif

#if !defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
TEST(TEST_CATEGORY, triple_nested_parallelism) {
// With KOKKOS_ENABLE_DEBUG enabled, the functor uses too many registers to run
// with a team size of 32 on GPUs, 16 is the max possible (at least on a K80
Expand All @@ -1055,7 +1052,6 @@ TEST(TEST_CATEGORY, triple_nested_parallelism) {
TestTripleNestedReduce<double, TEST_EXECSPACE>(8192, 2048, 16, 16);
TestTripleNestedReduce<double, TEST_EXECSPACE>(8192, 2048, 7, 16);
}
#endif

TEST(TEST_CATEGORY, parallel_scan_with_reducers) {
using T = double;
Expand Down

0 comments on commit 1e1ed13

Please sign in to comment.