Add clang as CUDA compiler

* Rename all jobs containing cudaXX into nvccXX * Add a new job with clang-16 and CUDA 12.1 and build with C++20 (avoid boost::atomic_ref) * Apply some fixes to LLAMA code * Add a cmake workaround for libstdc++ 12
alpaka-group · Mar 21, 2023 · 9c2b229 · 9c2b229
1 parent 95883a4
commit 9c2b229
Show file tree

Hide file tree

Showing 9 changed files with 66 additions and 29 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -151,6 +151,7 @@ jobs:
     runs-on: ${{ matrix.os || 'ubuntu-22.04' }}
     env:
       CXX: ${{ matrix.cxx }}
+      CUDACXX: ${{ matrix.cudacxx }}
     name: ${{ matrix.name }}
     strategy:
       fail-fast: false
@@ -160,37 +161,37 @@ jobs:
             cxx: g++-9
           - name: build-ubuntu-gcc10
             cxx: g++-10
-          - name: build-ubuntu-gcc10-cuda11.2
+          - name: build-ubuntu-gcc10-nvcc11.2
             cxx: g++-10
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
-          - name: build-ubuntu-gcc10-cuda11.3
+          - name: build-ubuntu-gcc10-nvcc11.3
             cxx: g++-10
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
-          - name: build-ubuntu-gcc10-cuda11.4
+          - name: build-ubuntu-gcc10-nvcc11.4
             cxx: g++-10
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.4.4/local_installers/cuda_11.4.4_470.82.01_linux.run
-          - name: build-ubuntu-gcc10-cuda11.5
+          - name: build-ubuntu-gcc10-nvcc11.5
             cxx: g++-10
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda_11.5.2_495.29.05_linux.run
-          - name: build-ubuntu-gcc10-cuda11.6
+          - name: build-ubuntu-gcc10-nvcc11.6
             cxx: g++-10
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run
           - name: build-ubuntu-gcc11
             cxx: g++-11
-          - name: build-ubuntu-gcc11-cuda11.7
+          - name: build-ubuntu-gcc11-nvcc11.7
             cxx: g++-11
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run
-          - name: build-ubuntu-gcc11-cuda11.8
+          - name: build-ubuntu-gcc11-nvcc11.8
             cxx: g++-11
             cuda_url: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
           - name: build-ubuntu-gcc12
             cxx: g++-12
             install_extra: g++-12
-          - name: build-ubuntu-gcc12-cuda12.0
+          - name: build-ubuntu-gcc12-nvcc12.0
             cxx: g++-12
             install_extra: g++-12
             cuda_url: https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run
-          - name: build-ubuntu-gcc12-cuda12.1
+          - name: build-ubuntu-gcc12-nvcc12.1
             cxx: g++-12
             install_extra: g++-12
             cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
@@ -219,6 +220,13 @@ jobs:
             cxx: clang++-16
             install_extra: clang-16 libomp-16-dev
             add_llvm_repo: true
+          - name: build-ubuntu-clang16-cuda11.6
+            cxx: clang++-16
+            cudacxx: clang++-16
+            install_extra: clang-16 libomp-16-dev
+            cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
+            add_llvm_repo: true
+            cxx_std: 20
           - name: build-ubuntu-icpx
             cxx: icpx
             install_oneapi: true
@@ -279,16 +287,19 @@ jobs:
           mkdir build
           cd build
 
-          CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`)
-          if [ ! -f $CUDACXX ]; then
-            unset CUDACXX
+          # try to find nvcc if no CUDACXX is provided
+          if [ -z "$CUDACXX" ]; then
+            CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`)
+            if [ ! -f $CUDACXX ]; then
+              unset CUDACXX
+            fi
           fi
-          echo "nvcc is here: $CUDACXX"
+          echo "CUDACXX is here: $CUDACXX"
 
-          NVHPC_FLAGS=
+          CXX_FLAGS=${{ matrix.cxx_flags }}
           if [ ${{ matrix.add_nvcpp_repo }} ]; then
             # cmake (in some versions) passes some flags that nvc++ does not understand
-            NVHPC_FLAGS='-noswitcherror'
+            CXX_FLAGS+=" -noswitcherror"
           fi
 
           cmake .. -DBUILD_TESTING=ON \
@@ -298,10 +309,11 @@ jobs:
                    -Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=${{ !matrix.cuda_url }} \
                    -Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \
                    -Dalpaka_ACC_GPU_CUDA_ENABLE=${{ !!matrix.cuda_url }} \
-                   -Dalpaka_CXX_STANDARD=17 \
+                   -Dalpaka_CXX_STANDARD=${{ matrix.cxx_std || '17' }} \
                    -DCMAKE_CUDA_COMPILER=$CUDACXX \
                    -DCMAKE_CUDA_HOST_COMPILER=$CXX \
-                   -DCMAKE_CXX_FLAGS=$NVHPC_FLAGS \
+                   -DCMAKE_CXX_FLAGS="$CXX_FLAGS" \
+                   -DCMAKE_CUDA_FLAGS="${{ matrix.cuda_flags }}" \
                    -DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake
       - name: build tests + examples
         run: |

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -43,6 +43,18 @@ check_language(CUDA)
 if (CMAKE_CUDA_COMPILER)
 	enable_language(CUDA)
 	set(CMAKE_CUDA_ARCHITECTURES "35" CACHE STRING "CUDA architectures to compile for")
+
+	if (CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+		target_compile_definitions(${PROJECT_NAME} INTERFACE -DFMT_USE_FLOAT128=0)
+
+		# Workaround for clang as CUDA compiler with libstdc++ 12
+		file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/clang_cuda_libstdc++12_workaround.hpp"
+				"#include <__clang_cuda_runtime_wrapper.h>\n"
+				"#if defined(__clang__) && defined(__CUDA__) && defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 12 && defined(__noinline__)\n"
+				"#    undef __noinline__\n"
+				"#endif\n")
+		target_compile_options(${PROJECT_NAME} INTERFACE -include "${CMAKE_CURRENT_BINARY_DIR}/clang_cuda_libstdc++12_workaround.hpp")
+	endif()
 else()
 	message(WARNING "Could not find CUDA. Try setting CMAKE_CUDA_COMPILER. CUDA tests and examples are disabled.")
 endif()

diff --git a/examples/cuda/nbody/CMakeLists.txt b/examples/cuda/nbody/CMakeLists.txt
@@ -11,7 +11,12 @@ if (NOT TARGET llama::llama)
 endif ()
 add_executable(${PROJECT_NAME} nbody.cu ../../common/Stopwatch.hpp)
 target_compile_features(${PROJECT_NAME} PRIVATE cuda_std_17)
-target_compile_options(${PROJECT_NAME} PUBLIC
-        --expt-relaxed-constexpr --use_fast_math
-        $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>)
+if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+    target_compile_options(${PROJECT_NAME} PUBLIC
+            --expt-relaxed-constexpr --use_fast_math
+            $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>
+            )
+else ()
+    target_compile_options(${PROJECT_NAME} PUBLIC -ffast-math -Wall -Wextra)
+endif ()
 target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama CUDA::cudart fmt::fmt)
diff --git a/examples/cuda/pitch/CMakeLists.txt b/examples/cuda/pitch/CMakeLists.txt
@@ -19,8 +19,13 @@ if (NOT TARGET llama::llama)
 endif ()
 add_executable(${PROJECT_NAME} pitch.cu)
 target_compile_features(${PROJECT_NAME} PRIVATE cuda_std_17)
-target_compile_options(${PROJECT_NAME} PUBLIC
+if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+    target_compile_options(${PROJECT_NAME} PUBLIC
         --expt-relaxed-constexpr --use_fast_math
-        --compiler-options -Wall,-Wextra)
+        $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>
+    )
+else ()
+    target_compile_options(${PROJECT_NAME} PUBLIC -ffast-math -Wall -Wextra)
+endif ()
 target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../../../thirdparty/stb/include)
 target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama CUDA::cudart fmt::fmt)
diff --git a/examples/viewcopy/CMakeLists.txt b/examples/viewcopy/CMakeLists.txt
@@ -31,3 +31,6 @@ else()
 	target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
 	target_compile_options(${PROJECT_NAME} PRIVATE -march=native)
 endif()
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+	target_compile_options(${PROJECT_NAME} PRIVATE -fbracket-depth=1500)
+endif()
diff --git a/include/llama/Tuple.hpp b/include/llama/Tuple.hpp
@@ -111,7 +111,7 @@ namespace llama
     };
 
     template<typename... Elements>
-    Tuple(Elements...) -> Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>;
+    LLAMA_HOST_ACC Tuple(Elements...)->Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>;
 
     template<std::size_t I, typename... Elements>
     LLAMA_FN_HOST_ACC_INLINE constexpr auto get(Tuple<Elements...>& tuple) -> auto&

diff --git a/include/llama/llama.hpp b/include/llama/llama.hpp
@@ -31,7 +31,7 @@
 
 // suppress warnings on missing return statements. we get a lot of these because nvcc/nvc++ have some troubles with if
 // constexpr.
-#ifdef __CUDACC__
+#ifdef __NVCC__
 #    ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
 #        pragma nv_diag_suppress 940
 #    else
@@ -76,7 +76,7 @@
 #include "mapping/Split.hpp"
 #include "mapping/tree/Mapping.hpp"
 
-#if defined(__CUDACC__)
+#if defined(__NVCC__)
 #    ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
 #        pragma nv_diag_default 940
 #    else

diff --git a/include/llama/macros.hpp b/include/llama/macros.hpp
@@ -124,7 +124,7 @@
 #endif
 
 #ifndef LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
-#    ifdef __CUDACC__
+#    ifdef __NVCC__
 #        ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
 #            define LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING                                                          \
                 _Pragma("nv_diag_suppress 20011") _Pragma("nv_diag_suppress 20014")
@@ -142,7 +142,7 @@
 #    endif
 #endif
 #ifndef LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
-#    ifdef __CUDACC__
+#    ifdef __NVCC__
 #        ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
 #            define LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING                                                            \
                 _Pragma("nv_diag_default 20011") _Pragma("nv_diag_default 20014")

diff --git a/include/llama/mapping/ChangeType.hpp b/include/llama/mapping/ChangeType.hpp
@@ -14,12 +14,12 @@ namespace llama::mapping
         template<typename UserT, typename StoredT>
         struct ChangeTypeProjection
         {
-            static auto load(StoredT v) -> UserT
+            LLAMA_FN_HOST_ACC_INLINE static auto load(StoredT v) -> UserT
             {
                 return static_cast<UserT>(v); // we could allow stronger casts here
             }
 
-            static auto store(UserT v) -> StoredT
+            LLAMA_FN_HOST_ACC_INLINE static auto store(UserT v) -> StoredT
             {
                 return static_cast<StoredT>(v); // we could allow stronger casts here
             }