Skip to content

Commit

Permalink
Add clang as CUDA compiler
Browse files Browse the repository at this point in the history
* Rename all jobs containing cudaXX into nvccXX
* Add a new job with clang-16 and CUDA 12.1 and build with C++20 (avoid
  boost::atomic_ref)
* Apply some fixes to LLAMA code
* Add a cmake workaround for libstdc++ 12
  • Loading branch information
bernhardmgruber committed Mar 21, 2023
1 parent 95883a4 commit 9c2b229
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 29 deletions.
46 changes: 29 additions & 17 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ jobs:
runs-on: ${{ matrix.os || 'ubuntu-22.04' }}
env:
CXX: ${{ matrix.cxx }}
CUDACXX: ${{ matrix.cudacxx }}
name: ${{ matrix.name }}
strategy:
fail-fast: false
Expand All @@ -160,37 +161,37 @@ jobs:
cxx: g++-9
- name: build-ubuntu-gcc10
cxx: g++-10
- name: build-ubuntu-gcc10-cuda11.2
- name: build-ubuntu-gcc10-nvcc11.2
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
- name: build-ubuntu-gcc10-cuda11.3
- name: build-ubuntu-gcc10-nvcc11.3
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
- name: build-ubuntu-gcc10-cuda11.4
- name: build-ubuntu-gcc10-nvcc11.4
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.4.4/local_installers/cuda_11.4.4_470.82.01_linux.run
- name: build-ubuntu-gcc10-cuda11.5
- name: build-ubuntu-gcc10-nvcc11.5
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda_11.5.2_495.29.05_linux.run
- name: build-ubuntu-gcc10-cuda11.6
- name: build-ubuntu-gcc10-nvcc11.6
cxx: g++-10
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run
- name: build-ubuntu-gcc11
cxx: g++-11
- name: build-ubuntu-gcc11-cuda11.7
- name: build-ubuntu-gcc11-nvcc11.7
cxx: g++-11
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run
- name: build-ubuntu-gcc11-cuda11.8
- name: build-ubuntu-gcc11-nvcc11.8
cxx: g++-11
cuda_url: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
- name: build-ubuntu-gcc12
cxx: g++-12
install_extra: g++-12
- name: build-ubuntu-gcc12-cuda12.0
- name: build-ubuntu-gcc12-nvcc12.0
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run
- name: build-ubuntu-gcc12-cuda12.1
- name: build-ubuntu-gcc12-nvcc12.1
cxx: g++-12
install_extra: g++-12
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
Expand Down Expand Up @@ -219,6 +220,13 @@ jobs:
cxx: clang++-16
install_extra: clang-16 libomp-16-dev
add_llvm_repo: true
- name: build-ubuntu-clang16-cuda11.6
cxx: clang++-16
cudacxx: clang++-16
install_extra: clang-16 libomp-16-dev
cuda_url: https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
add_llvm_repo: true
cxx_std: 20
- name: build-ubuntu-icpx
cxx: icpx
install_oneapi: true
Expand Down Expand Up @@ -279,16 +287,19 @@ jobs:
mkdir build
cd build
CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`)
if [ ! -f $CUDACXX ]; then
unset CUDACXX
# try to find nvcc if no CUDACXX is provided
if [ -z "$CUDACXX" ]; then
CUDACXX=(`echo /usr/local/cuda-*/bin/nvcc`)
if [ ! -f $CUDACXX ]; then
unset CUDACXX
fi
fi
echo "nvcc is here: $CUDACXX"
echo "CUDACXX is here: $CUDACXX"
NVHPC_FLAGS=
CXX_FLAGS=${{ matrix.cxx_flags }}
if [ ${{ matrix.add_nvcpp_repo }} ]; then
# cmake (in some versions) passes some flags that nvc++ does not understand
NVHPC_FLAGS='-noswitcherror'
CXX_FLAGS+=" -noswitcherror"
fi
cmake .. -DBUILD_TESTING=ON \
Expand All @@ -298,10 +309,11 @@ jobs:
-Dalpaka_ACC_CPU_B_SEQ_T_SEQ_ENABLE=${{ !matrix.cuda_url }} \
-Dalpaka_ACC_CPU_DISABLE_ATOMIC_REF=ON \
-Dalpaka_ACC_GPU_CUDA_ENABLE=${{ !!matrix.cuda_url }} \
-Dalpaka_CXX_STANDARD=17 \
-Dalpaka_CXX_STANDARD=${{ matrix.cxx_std || '17' }} \
-DCMAKE_CUDA_COMPILER=$CUDACXX \
-DCMAKE_CUDA_HOST_COMPILER=$CXX \
-DCMAKE_CXX_FLAGS=$NVHPC_FLAGS \
-DCMAKE_CXX_FLAGS="$CXX_FLAGS" \
-DCMAKE_CUDA_FLAGS="${{ matrix.cuda_flags }}" \
-DCMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake
- name: build tests + examples
run: |
Expand Down
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
set(CMAKE_CUDA_ARCHITECTURES "35" CACHE STRING "CUDA architectures to compile for")

if (CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
target_compile_definitions(${PROJECT_NAME} INTERFACE -DFMT_USE_FLOAT128=0)

# Workaround for clang as CUDA compiler with libstdc++ 12
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/clang_cuda_libstdc++12_workaround.hpp"
"#include <__clang_cuda_runtime_wrapper.h>\n"
"#if defined(__clang__) && defined(__CUDA__) && defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 12 && defined(__noinline__)\n"
"# undef __noinline__\n"
"#endif\n")
target_compile_options(${PROJECT_NAME} INTERFACE -include "${CMAKE_CURRENT_BINARY_DIR}/clang_cuda_libstdc++12_workaround.hpp")
endif()
else()
message(WARNING "Could not find CUDA. Try setting CMAKE_CUDA_COMPILER. CUDA tests and examples are disabled.")
endif()
Expand Down
11 changes: 8 additions & 3 deletions examples/cuda/nbody/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ if (NOT TARGET llama::llama)
endif ()
add_executable(${PROJECT_NAME} nbody.cu ../../common/Stopwatch.hpp)
target_compile_features(${PROJECT_NAME} PRIVATE cuda_std_17)
target_compile_options(${PROJECT_NAME} PUBLIC
--expt-relaxed-constexpr --use_fast_math
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>)
if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
target_compile_options(${PROJECT_NAME} PUBLIC
--expt-relaxed-constexpr --use_fast_math
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>
)
else ()
target_compile_options(${PROJECT_NAME} PUBLIC -ffast-math -Wall -Wextra)
endif ()
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama CUDA::cudart fmt::fmt)
9 changes: 7 additions & 2 deletions examples/cuda/pitch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ if (NOT TARGET llama::llama)
endif ()
add_executable(${PROJECT_NAME} pitch.cu)
target_compile_features(${PROJECT_NAME} PRIVATE cuda_std_17)
target_compile_options(${PROJECT_NAME} PUBLIC
if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
target_compile_options(${PROJECT_NAME} PUBLIC
--expt-relaxed-constexpr --use_fast_math
--compiler-options -Wall,-Wextra)
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:--compiler-options -Wall,-Wextra>
)
else ()
target_compile_options(${PROJECT_NAME} PUBLIC -ffast-math -Wall -Wextra)
endif ()
target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../../../thirdparty/stb/include)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama CUDA::cudart fmt::fmt)
3 changes: 3 additions & 0 deletions examples/viewcopy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ else()
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
target_compile_options(${PROJECT_NAME} PRIVATE -march=native)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE -fbracket-depth=1500)
endif()
2 changes: 1 addition & 1 deletion include/llama/Tuple.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ namespace llama
};

template<typename... Elements>
Tuple(Elements...) -> Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>;
LLAMA_HOST_ACC Tuple(Elements...)->Tuple<std::remove_cv_t<std::remove_reference_t<Elements>>...>;

template<std::size_t I, typename... Elements>
LLAMA_FN_HOST_ACC_INLINE constexpr auto get(Tuple<Elements...>& tuple) -> auto&
Expand Down
4 changes: 2 additions & 2 deletions include/llama/llama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

// suppress warnings on missing return statements. we get a lot of these because nvcc/nvc++ have some troubles with if
// constexpr.
#ifdef __CUDACC__
#ifdef __NVCC__
# ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
# pragma nv_diag_suppress 940
# else
Expand Down Expand Up @@ -76,7 +76,7 @@
#include "mapping/Split.hpp"
#include "mapping/tree/Mapping.hpp"

#if defined(__CUDACC__)
#if defined(__NVCC__)
# ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
# pragma nv_diag_default 940
# else
Expand Down
4 changes: 2 additions & 2 deletions include/llama/macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
#endif

#ifndef LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
# ifdef __CUDACC__
# ifdef __NVCC__
# ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
# define LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING \
_Pragma("nv_diag_suppress 20011") _Pragma("nv_diag_suppress 20014")
Expand All @@ -142,7 +142,7 @@
# endif
#endif
#ifndef LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
# ifdef __CUDACC__
# ifdef __NVCC__
# ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
# define LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING \
_Pragma("nv_diag_default 20011") _Pragma("nv_diag_default 20014")
Expand Down
4 changes: 2 additions & 2 deletions include/llama/mapping/ChangeType.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ namespace llama::mapping
template<typename UserT, typename StoredT>
struct ChangeTypeProjection
{
static auto load(StoredT v) -> UserT
LLAMA_FN_HOST_ACC_INLINE static auto load(StoredT v) -> UserT
{
return static_cast<UserT>(v); // we could allow stronger casts here
}

static auto store(UserT v) -> StoredT
LLAMA_FN_HOST_ACC_INLINE static auto store(UserT v) -> StoredT
{
return static_cast<StoredT>(v); // we could allow stronger casts here
}
Expand Down

0 comments on commit 9c2b229

Please sign in to comment.