ginkgo-project · yhmtsai · Aug 18, 2021 · Aug 17, 2021 · Aug 17, 2021 · Aug 18, 2021
diff --git a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc
@@ -47,7 +47,7 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel(
     }
     auto neighbor = strongest_neighbor_vals[tidx];
     if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx &&
-        tidx < neighbor) {
+        tidx <= neighbor) {
         // Use the smaller index as agg point
         agg_vals[tidx] = tidx;
         agg_vals[neighbor] = tidx;

diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp
@@ -93,8 +93,8 @@ void AmgxPgm<ValueType, IndexType>::generate()
     // Initial agg = -1
     exec->run(amgx_pgm::make_fill_array(agg_.get_data(), agg_.get_num_elems(),
                                         -one<IndexType>()));
-    IndexType num_unagg{0};
-    IndexType num_unagg_prev{0};
+    IndexType num_unagg = num_rows;
+    IndexType num_unagg_prev = num_rows;
     // TODO: if mtx is a hermitian matrix, weight_mtx = abs(mtx)
     // compute weight_mtx = (abs(mtx) + abs(mtx'))/2;
     auto abs_mtx = amgxpgm_op->compute_absolute();
@@ -128,9 +128,11 @@ void AmgxPgm<ValueType, IndexType>::generate()
         // copy the agg to intermediate_agg
         intermediate_agg = agg_;
     }
-    // Assign all left points
-    exec->run(amgx_pgm::make_assign_to_exist_agg(weight_mtx.get(), diag.get(),
-                                                 agg_, intermediate_agg));
+    if (num_unagg != 0) {
+        // Assign all left points
+        exec->run(amgx_pgm::make_assign_to_exist_agg(
+            weight_mtx.get(), diag.get(), agg_, intermediate_agg));
+    }
     IndexType num_agg = 0;
     // Renumber the index
     exec->run(amgx_pgm::make_renumber(agg_, &num_agg));
@@ -153,9 +155,9 @@ void AmgxPgm<ValueType, IndexType>::generate()
     // TODO: use less memory footprint to improve it
     auto coarse_matrix =
         share(matrix_type::create(exec, gko::dim<2>{coarse_dim, coarse_dim}));
-    auto tmp = matrix_type::create(exec, gko::dim<2>{coarse_dim, fine_dim});
-    restrict_op->apply(amgxpgm_op, tmp.get());
-    tmp->apply(prolong_op.get(), coarse_matrix.get());
+    auto tmp = matrix_type::create(exec, gko::dim<2>{fine_dim, coarse_dim});
+    amgxpgm_op->apply(prolong_op.get(), tmp.get());
+    restrict_op->apply(tmp.get(), coarse_matrix.get());
 
     this->set_multigrid_level(prolong_op, coarse_matrix, restrict_op);
 }

diff --git a/cuda/preconditioner/jacobi_kernels.cu b/cuda/preconditioner/jacobi_kernels.cu
@@ -65,8 +65,6 @@ constexpr int default_num_warps = 32;
 // current GPUs have at most 84 SMs)
 constexpr int default_grid_size = 32 * 32 * 128;
 
-constexpr int default_block_size = 512;
-
 
 #include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc"
 

diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp
@@ -127,8 +127,11 @@ class AmgxPgm : public ::testing::Test {
         int nrhs = 3;
 
         agg = gen_agg_array(m, n);
-        unfinished_agg = gen_array(m, -1, n - 1);
-        strongest_neighbor = gen_array(m, 0, n - 1);
+        // only use 0 ~ n-2 and ensure the end isolated and not yet finished
+        unfinished_agg = gen_array(m, -1, n - 2);
+        unfinished_agg.get_data()[n - 1] = -1;
+        strongest_neighbor = gen_array(m, 0, n - 2);
+        strongest_neighbor.get_data()[n - 1] = n - 1;
         coarse_vector = gen_mtx(n, nrhs);
         fine_vector = gen_mtx(m, nrhs);
         auto weight = gen_mtx(m, m);

diff --git a/hip/preconditioner/jacobi_kernels.hip.cpp b/hip/preconditioner/jacobi_kernels.hip.cpp
@@ -72,7 +72,6 @@ constexpr int default_num_warps = 32;
 // current GPUs have at most 84 SMs)
 constexpr int default_grid_size = 32 * 32 * 128;
 
-constexpr int default_block_size = 512;
 
 #include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc"
 

diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp
@@ -126,8 +126,11 @@ class AmgxPgm : public ::testing::Test {
         int nrhs = 3;
 
         agg = gen_agg_array(m, n);
-        unfinished_agg = gen_array(m, -1, n - 1);
-        strongest_neighbor = gen_array(m, 0, n - 1);
+        // only use 0 ~ n-2 and ensure the end isolated and not yet finished
+        unfinished_agg = gen_array(m, -1, n - 2);
+        unfinished_agg.get_data()[n - 1] = -1;
+        strongest_neighbor = gen_array(m, 0, n - 2);
+        strongest_neighbor.get_data()[n - 1] = n - 1;
         coarse_vector = gen_mtx(n, nrhs);
         fine_vector = gen_mtx(m, nrhs);
         auto weight = gen_mtx(m, m);

diff --git a/include/ginkgo/core/multigrid/amgx_pgm.hpp b/include/ginkgo/core/multigrid/amgx_pgm.hpp
@@ -121,15 +121,15 @@ class AmgxPgm : public EnableLinOp<AmgxPgm<ValueType, IndexType>>,
          * NVIDIA AMGX Reference Manual (October 2017, API Version 2,
          * https://github.com/NVIDIA/AMGX/blob/main/doc/AMGX_Reference.pdf).
          */
-        unsigned GKO_FACTORY_PARAMETER(max_iterations, 15u);
+        unsigned GKO_FACTORY_PARAMETER_SCALAR(max_iterations, 15u);
 
         /**
          * The maximum ratio of unassigned number, which is valid in the
          * interval 0.0 ~ 1.0. We use the same default value as NVIDIA AMGX
          * Reference Manual (October 2017, API Version 2,
          * https://github.com/NVIDIA/AMGX/blob/main/doc/AMGX_Reference.pdf).
          */
-        double GKO_FACTORY_PARAMETER(max_unassigned_ratio, 0.05);
+        double GKO_FACTORY_PARAMETER_SCALAR(max_unassigned_ratio, 0.05);
 
         /**
          * Use the deterministic assign_to_exist_agg method or not.
@@ -138,7 +138,7 @@ class AmgxPgm : public EnableLinOp<AmgxPgm<ValueType, IndexType>>,
          * from the same matrix. Otherwise, the aggregated group might be
          * different depending on the execution ordering.
          */
-        bool GKO_FACTORY_PARAMETER(deterministic, false);
+        bool GKO_FACTORY_PARAMETER_SCALAR(deterministic, false);
     };
     GKO_ENABLE_LIN_OP_FACTORY(AmgxPgm, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);

diff --git a/include/ginkgo/core/multigrid/multigrid_level.hpp b/include/ginkgo/core/multigrid/multigrid_level.hpp
@@ -46,6 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
+/**
+ * @brief The multigrid components namespace.
+ *
+ * @ingroup gko
+ */
 namespace multigrid {
 
 

diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp
@@ -78,7 +78,7 @@ void match_edge(std::shared_ptr<const OmpExecutor> exec,
         if (agg_vals[i] == -1) {
             auto neighbor = strongest_neighbor_vals[i];
             if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i &&
-                i < neighbor) {
+                i <= neighbor) {
                 // Use the smaller index as agg point
                 agg_vals[i] = i;
                 agg_vals[neighbor] = i;

diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp
@@ -117,8 +117,11 @@ class AmgxPgm : public ::testing::Test {
         int nrhs = 3;
 
         agg = gen_agg_array(m, n);
-        unfinished_agg = gen_array(m, -1, n - 1);
-        strongest_neighbor = gen_array(m, 0, n - 1);
+        // only use 0 ~ n-2 and ensure the end isolated and not yet finished
+        unfinished_agg = gen_array(m, -1, n - 2);
+        unfinished_agg.get_data()[n - 1] = -1;
+        strongest_neighbor = gen_array(m, 0, n - 2);
+        strongest_neighbor.get_data()[n - 1] = n - 1;
         coarse_vector = gen_mtx(n, nrhs);
         fine_vector = gen_mtx(m, nrhs);
         auto weight = gen_mtx(m, m);

diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp
@@ -74,7 +74,7 @@ void match_edge(std::shared_ptr<const ReferenceExecutor> exec,
             auto neighbor = strongest_neighbor_vals[i];
             // i < neighbor always holds when neighbor is not -1
             if (neighbor != -1 && strongest_neighbor_vals[neighbor] == i &&
-                i < neighbor) {
+                i <= neighbor) {
                 // Use the smaller index as agg point
                 agg_vals[i] = i;
                 agg_vals[neighbor] = i;

diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp
@@ -304,15 +304,17 @@ TYPED_TEST(AmgxPgm, MatchEdge)
     snb_val[1] = 0;
     snb_val[2] = 0;
     snb_val[3] = 1;
-    snb_val[4] = 2;
+    // isolated item
+    snb_val[4] = 4;
 
     gko::kernels::reference::amgx_pgm::match_edge(this->exec, snb, agg);
 
     ASSERT_EQ(agg_val[0], 0);
     ASSERT_EQ(agg_val[1], -1);
     ASSERT_EQ(agg_val[2], 0);
     ASSERT_EQ(agg_val[3], -1);
-    ASSERT_EQ(agg_val[4], -1);
+    // isolated item should be self aggregation
+    ASSERT_EQ(agg_val[4], 4);
 }