Merge pull request #14571 from masterleinad/kokkos_la_d_vector_device…

…_aware_mpi
dealii · Jan 27, 2023 · cd731cf · cd731cf
2 parents 22d2322 + c027dd2
commit cd731cf
Show file tree

Hide file tree

Showing 22 changed files with 542 additions and 569 deletions.
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -194,6 +194,7 @@ jobs:
               -D DEAL_II_WITH_KOKKOS="ON" \
               -D KOKKOS_DIR=${GITHUB_WORKSPACE}/../kokkos-install \
               -D DEAL_II_WITH_MPI="ON" \
+              -D DEAL_II_MPI_WITH_DEVICE_SUPPORT="ON" \
               -D DEAL_II_WITH_P4EST="ON" \
               -D DEAL_II_COMPONENT_EXAMPLES="ON" \
               ..

diff --git a/cmake/config/template-arguments.in b/cmake/config/template-arguments.in
@@ -73,6 +73,16 @@ MPI_SCALARS     := { int;
                      @DEAL_II_EXPAND_COMPLEX_SCALARS@;
                    }
 
+// complex types and long double are typically not directly supported on GPUs
+MPI_DEVICE_SCALARS := { int;
+                        long int;
+                        unsigned int;
+                        unsigned long int;
+                        unsigned long long int;
+                        float;
+                        double;
+                      }
+
 // template names for serial vectors that we can instantiate as T<S> where
 // S=REAL_SCALARS for example
 DEAL_II_VEC_TEMPLATES := { Vector; BlockVector }

diff --git a/cmake/configure/configure_10_mpi.cmake b/cmake/configure/configure_10_mpi.cmake
@@ -65,8 +65,12 @@ macro(feature_mpi_configure_external)
   # (in Modules/FindMPI.cmake) at some point. For the time being this is an
   # advanced configuration option.
   #
-  option(DEAL_II_MPI_WITH_CUDA_SUPPORT "Enable MPI Cuda support" OFF)
-  mark_as_advanced(DEAL_II_MPI_WITH_CUDA_SUPPORT)
+  if(DEAL_II_MPI_WITH_CUDA_SUPPORT)
+    option(DEAL_II_MPI_WITH_DEVICE_SUPPORT "Enable MPI Device support" ON)
+  else()
+    option(DEAL_II_MPI_WITH_DEVICE_SUPPORT "Enable MPI Device support" OFF)
+  endif()
+  mark_as_advanced(DEAL_II_MPI_WITH_DEVICE_SUPPORT)
 endmacro()
 
 macro(feature_mpi_error_message)
@@ -90,8 +94,8 @@ configure_feature(MPI)
 
 if(NOT DEAL_II_WITH_MPI)
   #
-  # Disable and hide the DEAL_II_MPI_WITH_CUDA_SUPPORT option
+  # Disable and hide the DEAL_II_MPI_WITH_DEVICE_SUPPORT option
   #
-  set(DEAL_II_MPI_WITH_CUDA_SUPPORT)
-  unset(DEAL_II_MPI_WITH_CUDA_SUPPORT CACHE)
+  set(DEAL_II_MPI_WITH_DEVICE_SUPPORT)
+  unset(DEAL_II_MPI_WITH_DEVICE_SUPPORT CACHE)
 endif()
diff --git a/doc/doxygen/options.dox.in b/doc/doxygen/options.dox.in
@@ -209,7 +209,7 @@ PREDEFINED             = DOXYGEN=1 \
                          DEAL_II_LAPACK_WITH_MKL=1 \
                          DEAL_II_WITH_METIS=1 \
                          DEAL_II_WITH_MPI=1 \
-                         DEAL_II_MPI_WITH_CUDA_SUPPORT=1 \
+                         DEAL_II_MPI_WITH_DEVICE_SUPPORT=1 \
                          DEAL_II_MPI_VERSION_MAJOR=3 \
                          DEAL_II_MPI_VERSION_MINOR=0 \
                          DEAL_II_WITH_MUPARSER=1 \

diff --git a/doc/external-libs/cuda.html b/doc/external-libs/cuda.html
@@ -46,7 +46,7 @@ <h1>Installing deal.II with CUDA</h1>
 
         -DDEAL_II_WITH_CUDA=ON
         -DDEAL_II_WITH_MPI=ON
-        -DDEAL_II_MPI_WITH_CUDA_SUPPORT=ON
+        -DDEAL_II_MPI_WITH_DEVICE_SUPPORT=ON
       </pre>
       Note, that there is no check that detects if the MPI implementation
       really is CUDA-aware. Activating this flag for incompatible MPI libraries

diff --git a/examples/step-64/step-64.cc b/examples/step-64/step-64.cc
@@ -358,8 +358,8 @@ namespace Step64
     // memory space to use. There is also LinearAlgebra::CUDAWrappers::Vector
     // that always uses GPU memory storage but doesn't work with MPI. It might
     // be worth noticing that the communication between different MPI processes
-    // can be improved if the MPI implementation is CUDA-aware and the configure
-    // flag `DEAL_II_MPI_WITH_CUDA_SUPPORT` is enabled. (The value of this
+    // can be improved if the MPI implementation is GPU-aware and the configure
+    // flag `DEAL_II_MPI_WITH_DEVICE_SUPPORT` is enabled. (The value of this
     // flag needs to be set at the time you call `cmake` when installing
     // deal.II.)
     //

diff --git a/include/deal.II/base/config.h.in b/include/deal.II/base/config.h.in
@@ -448,7 +448,10 @@
 #  define DEAL_II_MPI_VERSION_GTE(major,minor) false
 #endif
 
+#cmakedefine DEAL_II_MPI_WITH_DEVICE_SUPPORT
+#ifdef DEAL_II_MPI_WITH_DEVICE_SUPPORT
 #cmakedefine DEAL_II_MPI_WITH_CUDA_SUPPORT
+#endif
 
 /***********************************************************************
  * Two macro names that we put at the top and bottom of all deal.II files

diff --git a/include/deal.II/base/partitioner.h b/include/deal.II/base/partitioner.h
@@ -671,7 +671,7 @@ namespace Utilities
     private:
       /**
        * Initialize import_indices_plain_dev from import_indices_data. This
-       * function is only used when using CUDA-aware MPI.
+       * function is only used when using device-aware MPI.
        */
       void
       initialize_import_indices_plain_dev() const;
@@ -722,15 +722,13 @@ namespace Utilities
       /**
        * The set of (local) indices that we are importing during compress(),
        * i.e., others' ghosts that belong to the local range. The data stored is
-       * the same than in import_indices_data but the data is expanded in plain
-       * arrays. This variable is only used when using CUDA-aware MPI.
+       * the same as in import_indices_data but the data is expanded in plain
+       * arrays. This variable is only used when using device-aware MPI.
        */
       // The variable is mutable to enable lazy initialization in
-      // export_to_ghosted_array_start(). This way partitioner does not have to
-      // be templated on the MemorySpaceType.
+      // export_to_ghosted_array_start().
       mutable std::vector<
-        std::pair<std::unique_ptr<unsigned int[], void (*)(unsigned int *)>,
-                  unsigned int>>
+        Kokkos::View<unsigned int *, MemorySpace::Default::kokkos_space>>
         import_indices_plain_dev;
 
       /**