precommit: Add file extension .cc

cp2k · Sep 15, 2021 · 1b9c42d · 1b9c42d
1 parent 524554b
commit 1b9c42d
Show file tree

Hide file tree

Showing 5 changed files with 158 additions and 145 deletions.
diff --git a/src/grid/hip/grid_hip_collocate.cc b/src/grid/hip/grid_hip_collocate.cc
@@ -135,8 +135,9 @@ __global__ void calculate_coefficients(const kernel_params dev_) {
   calculate_coefficients. We only keep the non zero elements to same memory.
 */
 
-  template <typename T, typename T3, bool distributed__, bool orthorhombic_>
-__global__ __launch_bounds__(64) void collocate_kernel(const kernel_params dev_) {
+template <typename T, typename T3, bool distributed__, bool orthorhombic_>
+__global__
+__launch_bounds__(64) void collocate_kernel(const kernel_params dev_) {
   // Copy task from global to shared memory and precompute some stuff.
   __shared__ smem_task_reduced<T, T3> task;
 
@@ -180,11 +181,11 @@ __global__ __launch_bounds__(64) void collocate_kernel(const kernel_params dev_)
 
     if (distributed__) {
       if (task.apply_border_mask) {
-        compute_window_size(dev_.grid_local_size_, dev_.grid_lower_corner_,
-                            dev_.grid_full_size_, // also full size of the grid
-                            dev_.tasks[dev_.first_task + blockIdx.x].border_mask,
-                            dev_.grid_border_width_, &task.window_size,
-                            &task.window_shift);
+        compute_window_size(
+            dev_.grid_local_size_, dev_.grid_lower_corner_,
+            dev_.grid_full_size_, // also full size of the grid
+            dev_.tasks[dev_.first_task + blockIdx.x].border_mask,
+            dev_.grid_border_width_, &task.window_size, &task.window_shift);
       }
     }
   }
@@ -200,10 +201,10 @@ __global__ __launch_bounds__(64) void collocate_kernel(const kernel_params dev_)
       // check if the point is within the window
       if (task.apply_border_mask) {
         // this test is only relevant when the grid is split over several mpi
-        // ranks. in that case we take only the points contributing to local part
-        // of the grid.
+        // ranks. in that case we take only the points contributing to local
+        // part of the grid.
         if ((z2 < task.window_shift.z) || (z2 > task.window_size.z)) {
-        continue;
+          continue;
         }
       }
     }
@@ -274,16 +275,15 @@ __global__ __launch_bounds__(64) void collocate_kernel(const kernel_params dev_)
           r3.y = (y + task.lb_cube.y + task.roffset.y) * dh_[4];
           r3.z = (z + task.lb_cube.z + task.roffset.z) * dh_[8];
         } else {
-          r3 =
-            compute_coordinates(dh_, (x + task.lb_cube.x + task.roffset.x),
-                                (y + task.lb_cube.y + task.roffset.y),
-                                (z + task.lb_cube.z + task.roffset.z));
+          r3 = compute_coordinates(dh_, (x + task.lb_cube.x + task.roffset.x),
+                                   (y + task.lb_cube.y + task.roffset.y),
+                                   (z + task.lb_cube.z + task.roffset.z));
         }
 
         if (distributed__) {
-          // check if the point is inside the sphere or not. Note that it does not
-          // apply for the orthorhombic case when the full sphere is inside the
-          // region of interest.
+          // check if the point is inside the sphere or not. Note that it does
+          // not apply for the orthorhombic case when the full sphere is inside
+          // the region of interest.
 
           if (((task.radius * task.radius) <=
                (r3.x * r3.x + r3.y * r3.y + r3.z * r3.z)) &&
@@ -408,32 +408,36 @@ void context_info::collocate_one_grid_level(const int level,
 
   if (func == GRID_FUNC_AB) {
     calculate_coefficients<double, true>
-      <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.smem_per_block(), level_streams[level]>>>(
-            params);
+        <<<number_of_tasks_per_level_[level], threads_per_block,
+           smem_params.smem_per_block(), level_streams[level]>>>(params);
   } else {
     calculate_coefficients<double, false>
-      <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.smem_per_block(), level_streams[level]>>>(
-            params);
+        <<<number_of_tasks_per_level_[level], threads_per_block,
+           smem_params.smem_per_block(), level_streams[level]>>>(params);
   }
 
   if (grid_[level].is_distributed()) {
     if (grid_[level].is_orthorhombic())
       collocate_kernel<double, double3, true, true>
-        <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.cxyz_len() * sizeof(double),
-      level_streams[level]>>>(params);
+          <<<number_of_tasks_per_level_[level], threads_per_block,
+             smem_params.cxyz_len() * sizeof(double), level_streams[level]>>>(
+              params);
     else
       collocate_kernel<double, double3, true, false>
-        <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.cxyz_len() * sizeof(double),
-      level_streams[level]>>>(params);
+          <<<number_of_tasks_per_level_[level], threads_per_block,
+             smem_params.cxyz_len() * sizeof(double), level_streams[level]>>>(
+              params);
   } else {
     if (grid_[level].is_orthorhombic())
       collocate_kernel<double, double3, false, true>
-        <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.cxyz_len() * sizeof(double),
-        level_streams[level]>>>(params);
+          <<<number_of_tasks_per_level_[level], threads_per_block,
+             smem_params.cxyz_len() * sizeof(double), level_streams[level]>>>(
+              params);
     else
       collocate_kernel<double, double3, false, false>
-        <<<number_of_tasks_per_level_[level], threads_per_block, smem_params.cxyz_len() * sizeof(double),
-        level_streams[level]>>>(params);
+          <<<number_of_tasks_per_level_[level], threads_per_block,
+             smem_params.cxyz_len() * sizeof(double), level_streams[level]>>>(
+              params);
   }
 }
 } // namespace rocm_backend

diff --git a/src/grid/hip/grid_hip_context.cc b/src/grid/hip/grid_hip_context.cc
@@ -188,7 +188,7 @@ extern "C" void grid_hip_create_task_list(
 
     tasks_host[i].block_transposed = (iatom > jatom);
     tasks_host[i].subblock_offset =
-      (tasks_host[i].block_transposed)
+        (tasks_host[i].block_transposed)
             ? (tasks_host[i].sgfa * tasks_host[i].nsgfb + tasks_host[i].sgfb)
             : (tasks_host[i].sgfb * tasks_host[i].nsgfa + tasks_host[i].sgfa);
 
@@ -199,13 +199,15 @@ extern "C" void grid_hip_create_task_list(
     /* this block is only as temporary scratch for calculating the coefficients.
      * Doing this avoid a lot of atomic operations that are costly on hardware
      * that only have partial support of them. For better performance we should
-     * most probably align the offsets as well. it is 256 bytes on Mi100 and above */
+     * most probably align the offsets as well. it is 256 bytes on Mi100 and
+     * above */
     tasks_host[i].lp_max = tasks_host[i].lb_max + tasks_host[i].la_max + 6;
     if (i == 0) {
       tasks_host[i].coef_offset = 0;
     } else {
-      tasks_host[i].coef_offset = tasks_host[i - 1].coef_offset +
-                                  rocm_backend::ncoset(tasks_host[i - 1].lp_max);
+      tasks_host[i].coef_offset =
+          tasks_host[i - 1].coef_offset +
+          rocm_backend::ncoset(tasks_host[i - 1].lp_max);
     }
     coef_size += rocm_backend::ncoset(tasks_host[i].lp_max);
 
@@ -215,44 +217,43 @@ extern "C" void grid_hip_create_task_list(
     tasks_host[i].apply_border_mask = (tasks_host[i].border_mask != 0);
 
     if (grid.is_orthorhombic() && (tasks_host[i].border_mask == 0)) {
-      tasks_host[i]
-        .discrete_radius = rocm_backend::compute_cube_properties<double, double3, true>(
-          tasks_host[i].radius, grid.dh(), grid.dh_inv(),
-          (double3 *)tasks_host[i].rp, // center of the gaussian
-          &tasks_host[i].roffset, // offset compared to the closest grid point
-          &tasks_host[i].cube_center, // center coordinates in grid space
-          &tasks_host[i].lb_cube,     // lower boundary
-          &tasks_host[i].cube_size);
+      tasks_host[i].discrete_radius =
+          rocm_backend::compute_cube_properties<double, double3, true>(
+              tasks_host[i].radius, grid.dh(), grid.dh_inv(),
+              (double3 *)tasks_host[i].rp, // center of the gaussian
+              &tasks_host[i]
+                   .roffset, // offset compared to the closest grid point
+              &tasks_host[i].cube_center, // center coordinates in grid space
+              &tasks_host[i].lb_cube,     // lower boundary
+              &tasks_host[i].cube_size);
     } else {
-      tasks_host[i]
-        .discrete_radius = rocm_backend::compute_cube_properties<double, double3, false>(
-          tasks_host[i].radius, grid.dh(), grid.dh_inv(),
-          (double3 *)tasks_host[i].rp, // center of the gaussian
-          &tasks_host[i].roffset, // offset compared to the closest grid point
-          &tasks_host[i].cube_center, // center coordinates in grid space
-          &tasks_host[i].lb_cube,     // lower boundary
-          &tasks_host[i].cube_size);
+      tasks_host[i].discrete_radius =
+          rocm_backend::compute_cube_properties<double, double3, false>(
+              tasks_host[i].radius, grid.dh(), grid.dh_inv(),
+              (double3 *)tasks_host[i].rp, // center of the gaussian
+              &tasks_host[i]
+                   .roffset, // offset compared to the closest grid point
+              &tasks_host[i].cube_center, // center coordinates in grid space
+              &tasks_host[i].lb_cube,     // lower boundary
+              &tasks_host[i].cube_size);
     }
   }
 
   // we need to sort the task list although I expect it to be sorted already
-/*
- * sorting with this lambda does not work
-std::sort(tasks_host.begin(), tasks_host.end(), [](rocm_backend::task_info a, rocm_backend::task_info b) {
-    if (a.level == b.level) {
-      if (a.block_num <= b.block_num)
-        return true;
-      else
-        return false;
-    } else {
-      return (a.level < b.level);
-    }
-  });
-*/
+  /*
+   * sorting with this lambda does not work
+  std::sort(tasks_host.begin(), tasks_host.end(), [](rocm_backend::task_info a,
+  rocm_backend::task_info b) { if (a.level == b.level) { if (a.block_num <=
+  b.block_num) return true; else return false; } else { return (a.level <
+  b.level);
+      }
+    });
+  */
   // it is a exclusive scan actually
   for (int level = 1; level < ctx->number_of_tasks_per_level_.size(); level++) {
-    ctx->first_task_per_level_[level] = ctx->first_task_per_level_[level - 1] +
-      ctx->number_of_tasks_per_level_[level - 1];
+    ctx->first_task_per_level_[level] =
+        ctx->first_task_per_level_[level - 1] +
+        ctx->number_of_tasks_per_level_[level - 1];
   }
 
   ctx->tasks_dev.clear();
@@ -323,7 +324,7 @@ std::sort(tasks_host.begin(), tasks_host.end(), [](rocm_backend::task_info a, ro
   ctx->num_tasks_per_block_dev_.resize(num_tasks_per_block.size());
   ctx->num_tasks_per_block_dev_.copy_to_gpu(num_tasks_per_block);
 
-// collect stats
+  // collect stats
   memset(ctx->stats, 0, 2 * 20 * sizeof(int));
   for (int itask = 0; itask < ntasks; itask++) {
     const int iatom = iatom_list[itask] - 1;
@@ -440,7 +441,7 @@ extern "C" void grid_hip_integrate_task_list(
 
   rocm_backend::context_info *ctx = (rocm_backend::context_info *)ptr;
 
-  if(ptr == nullptr)
+  if (ptr == nullptr)
     return;
   assert(ctx->nlevels == nlevels);