Skip to content

Commit

Permalink
Merge pull request #14621 from masterleinad/remove_compiler_cuda_aware
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Dec 30, 2022
2 parents 25d263f + 3604e2f commit e20814c
Show file tree
Hide file tree
Showing 20 changed files with 48 additions and 45 deletions.
1 change: 0 additions & 1 deletion doc/doxygen/options.dox.in
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ PREDEFINED = DOXYGEN=1 \
DEAL_II_WITH_TASKFLOW=1 \
DEAL_II_WITH_COMPLEX_VALUES=1 \
DEAL_II_WITH_CUDA=1 \
DEAL_II_COMPILER_CUDA_AWARE=1 \
DEAL_II_WITH_GINKGO=1 \
DEAL_II_WITH_GMSH=1 \
DEAL_II_GMSH_WITH_API=1 \
Expand Down
8 changes: 0 additions & 8 deletions include/deal.II/base/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,6 @@
#cmakedefine DEAL_II_RESTRICT @DEAL_II_RESTRICT@
#cmakedefine DEAL_II_COMPILER_HAS_DIAGNOSTIC_PRAGMA

/*
* A variable to tell if the compiler used in the current compilation process
* understands CUDA code.
*/
#if defined(DEAL_II_WITH_CUDA) && defined(__CUDACC__)
# define DEAL_II_COMPILER_CUDA_AWARE
#endif

/***********************************************************************
* CPU features:
*
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/base/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <deal.II/base/array_view.h>
#include <deal.II/base/exceptions.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
# include <cusolverDn.h>
# include <cusolverSp.h>
# include <cusparse.h>
Expand Down
4 changes: 2 additions & 2 deletions include/deal.II/base/numbers.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#include <deal.II/base/types.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
# include <cuComplex.h>
#endif

Expand Down Expand Up @@ -809,7 +809,7 @@ namespace internal
}
};

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
template <>
struct NumberType<cuComplex>
{
Expand Down
24 changes: 9 additions & 15 deletions include/deal.II/base/partitioner.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ namespace Utilities
}

Number *temp_array_ptr = temporary_storage.data();
# if defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
# if defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
// When using CUDAs-aware MPI, the set of local indices that are ghosts
// indices on other processors is expanded in arrays. This is for
// performance reasons as this can significantly decrease the number of
Expand All @@ -121,8 +120,7 @@ namespace Utilities

for (unsigned int i = 0; i < n_import_targets; ++i)
{
# if defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
# if defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value)
{
const auto chunk_size = import_indices_plain_dev[i].second;
Expand Down Expand Up @@ -228,7 +226,7 @@ namespace Utilities
}
else
{
# ifdef DEAL_II_COMPILER_CUDA_AWARE
# ifdef DEAL_II_WITH_CUDA
cudaError_t cuda_error =
cudaMemcpy(ghost_array.data() + ghost_range.first,
ghost_array.data() + offset,
Expand Down Expand Up @@ -378,7 +376,7 @@ namespace Utilities
}
else
{
# ifdef DEAL_II_COMPILER_CUDA_AWARE
# ifdef DEAL_II_WITH_CUDA
cudaError_t cuda_error =
cudaMemcpy(ghost_array_ptr + offset,
ghost_array.data() + my_ghosts->first,
Expand Down Expand Up @@ -414,8 +412,7 @@ namespace Utilities
ExcMessage("Index overflow: Maximum message size in MPI is 2GB. "
"The number of ghost entries times the size of 'Number' "
"exceeds this value. This is not supported."));
# if defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
# if defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value)
cudaDeviceSynchronize();
# endif
Expand Down Expand Up @@ -526,7 +523,7 @@ namespace Utilities
"import_from_ghosted_array_start as is passed "
"to import_from_ghosted_array_finish."));

# ifdef DEAL_II_COMPILER_CUDA_AWARE
# ifdef DEAL_II_WITH_CUDA
if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value)
{
cudaMemset(ghost_array.data(),
Expand Down Expand Up @@ -560,8 +557,7 @@ namespace Utilities
const unsigned int n_import_targets = import_targets_data.size();
const unsigned int n_ghost_targets = ghost_targets_data.size();

# if (defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT))
# if (defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT))
// When using CUDAs-aware MPI, the set of local indices that are ghosts
// indices on other processors is expanded in arrays. This is for
// performance reasons as this can significantly decrease the number of
Expand All @@ -583,8 +579,7 @@ namespace Utilities
AssertThrowMPI(ierr);

const Number *read_position = temporary_storage.data();
# if !(defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT))
# if !(defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT))
// If the operation is no insertion, add the imported data to the
// local values. For insert, nothing is done here (but in debug mode
// we assert that the specified value is either zero or matches with
Expand Down Expand Up @@ -730,8 +725,7 @@ namespace Utilities
{
Assert(ghost_array.begin() != nullptr, ExcInternalError());

# if defined(DEAL_II_COMPILER_CUDA_AWARE) && \
defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
# if defined(DEAL_II_WITH_CUDA) && defined(DEAL_II_MPI_WITH_CUDA_SUPPORT)
if (std::is_same<MemorySpaceType, MemorySpace::CUDA>::value)
{
Assert(std::is_trivial<Number>::value, ExcNotImplemented());
Expand Down
24 changes: 21 additions & 3 deletions include/deal.II/base/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1432,8 +1432,12 @@ constexpr DEAL_II_ALWAYS_INLINE
DEAL_II_HOST_DEVICE const typename Tensor<rank_, dim, Number>::value_type &
Tensor<rank_, dim, Number>::operator[](const unsigned int i) const
{
# ifndef DEAL_II_COMPILER_CUDA_AWARE
# if KOKKOS_VERSION < 30700
# ifdef KOKKOS_ACTIVE_MEMORY_SPACE_HOST
AssertIndexRange(i, dim);
# endif
# else
KOKKOS_IF_ON_HOST((AssertIndexRange(i, dim);))
# endif

return values[i];
Expand All @@ -1444,9 +1448,16 @@ template <int rank_, int dim, typename Number>
constexpr inline DEAL_II_ALWAYS_INLINE const Number &
Tensor<rank_, dim, Number>::operator[](const TableIndices<rank_> &indices) const
{
# ifndef DEAL_II_COMPILER_CUDA_AWARE
# if KOKKOS_VERSION < 30700
# ifdef KOKKOS_ACTIVE_MEMORY_SPACE_HOST
Assert(dim != 0,
ExcMessage("Cannot access an object of type Tensor<rank_,0,Number>"));
# endif
# else
KOKKOS_IF_ON_HOST(
(Assert(dim != 0,
ExcMessage(
"Cannot access an object of type Tensor<rank_,0,Number>"));))
# endif

return TensorAccessors::extract<rank_>(*this, indices);
Expand All @@ -1458,9 +1469,16 @@ template <int rank_, int dim, typename Number>
constexpr inline DEAL_II_ALWAYS_INLINE Number &
Tensor<rank_, dim, Number>::operator[](const TableIndices<rank_> &indices)
{
# ifndef DEAL_II_COMPILER_CUDA_AWARE
# if KOKKOS_VERSION < 30700
# ifdef KOKKOS_ACTIVE_MEMORY_SPACE_HOST
Assert(dim != 0,
ExcMessage("Cannot access an object of type Tensor<rank_,0,Number>"));
# endif
# else
KOKKOS_IF_ON_HOST(
(Assert(dim != 0,
ExcMessage(
"Cannot access an object of type Tensor<rank_,0,Number>"));))
# endif

return TensorAccessors::extract<rank_>(*this, indices);
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/affine_constraints.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -2256,7 +2256,7 @@ namespace internal
vec.zero_out_ghost_values();
}

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
template <typename Number>
__global__ void
set_zero_kernel(const size_type * constrained_dofs,
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

DEAL_II_NAMESPACE_OPEN

Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA


# include <deal.II/base/cuda_size.h>
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_kernels.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

#include <deal.II/lac/cuda_kernels.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

DEAL_II_NAMESPACE_OPEN

Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_precondition.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#include <memory>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

DEAL_II_NAMESPACE_OPEN

Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_solver_direct.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
# include <deal.II/base/cuda.h>

# include <deal.II/lac/cuda_sparse_matrix.h>
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/lac/cuda_sparse_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include <iomanip>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
# include <deal.II/base/cuda.h>

# include <deal.II/lac/cuda_vector.h>
Expand Down
4 changes: 2 additions & 2 deletions include/deal.II/lac/precondition.h
Original file line number Diff line number Diff line change
Expand Up @@ -3386,7 +3386,7 @@ namespace internal
}


# ifdef DEAL_II_COMPILER_CUDA_AWARE
# ifdef DEAL_II_WITH_CUDA
template <typename Number>
__global__ void
set_initial_guess_kernel(const types::global_dof_index offset,
Expand Down Expand Up @@ -3425,7 +3425,7 @@ namespace internal
const Number mean_value = vector.mean_value();
vector.add(-mean_value);
}
# endif // DEAL_II_COMPILER_CUDA_AWARE
# endif // DEAL_II_WITH_CUDA

struct EigenvalueTracker
{
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/matrix_free/cuda_fe_evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

# include <deal.II/base/tensor.h>
# include <deal.II/base/utilities.h>
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/matrix_free/cuda_hanging_nodes_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

# include <deal.II/base/cuda_size.h>

Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/matrix_free/cuda_matrix_free.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <deal.II/base/config.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

# include <deal.II/base/cuda_size.h>
# include <deal.II/base/mpi_stub.h>
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/matrix_free/cuda_matrix_free.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#include <deal.II/matrix_free/cuda_matrix_free.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

# include <deal.II/base/cuda.h>
# include <deal.II/base/cuda_size.h>
Expand Down
2 changes: 1 addition & 1 deletion include/deal.II/matrix_free/cuda_tensor_product_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#include <deal.II/matrix_free/cuda_matrix_free.templates.h>

#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA

DEAL_II_NAMESPACE_OPEN

Expand Down
2 changes: 1 addition & 1 deletion tests/tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ struct MPILogInitAll
};


#ifdef DEAL_II_COMPILER_CUDA_AWARE
#ifdef DEAL_II_WITH_CUDA
// By default, all the ranks will try to access the device 0.
// If we are running with MPI support it is better to address different graphic
// cards for different processes even if only one node is used. The choice below
Expand Down

0 comments on commit e20814c

Please sign in to comment.