Remove n_filled_lanes_last_batch

dealii · May 1, 2023 · 9e42a0d · 9e42a0d
1 parent b325713
commit 9e42a0d
Showing 1 changed file with 16 additions and 40 deletions.
diff --git a/include/deal.II/matrix_free/fe_point_evaluation.h b/include/deal.II/matrix_free/fe_point_evaluation.h
@@ -977,7 +977,7 @@ class FEPointEvaluation
   static constexpr std::size_t n_lanes_internal =
     internal::VectorizedArrayTrait<VectorizedArrayType>::width;
   static constexpr std::size_t stride =
-    dealii::internal::VectorizedArrayTrait<Number>::stride;
+    internal::VectorizedArrayTrait<Number>::stride;
 
   /**
    * Common setup function for both constructors. Does the setup for both fast
@@ -1037,11 +1037,6 @@ class FEPointEvaluation
    */
   const unsigned int n_q_points_scalar;
 
-  /**
-   * Number of active quadrature points of the last quadrature point batch.
-   */
-  const unsigned int n_filled_lanes_last_batch;
-
   /**
    * Pointer to the Mapping object passed to the constructor.
    */
@@ -1222,7 +1217,6 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::FEPointEvaluation(
   const unsigned int        first_selected_component)
   : n_q_points(numbers::invalid_unsigned_int)
   , n_q_points_scalar(numbers::invalid_unsigned_int)
-  , n_filled_lanes_last_batch(numbers::invalid_unsigned_int)
   , mapping(&mapping)
   , fe(&fe)
   , update_flags(update_flags)
@@ -1247,7 +1241,6 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::FEPointEvaluation(
   const unsigned int                               first_selected_component)
   : n_q_points(numbers::invalid_unsigned_int)
   , n_q_points_scalar(numbers::invalid_unsigned_int)
-  , n_filled_lanes_last_batch(numbers::invalid_unsigned_int)
   , mapping(&mapping_info.get_mapping())
   , fe(&fe)
   , update_flags(mapping_info.get_update_flags())
@@ -1268,7 +1261,6 @@ FEPointEvaluation<n_components_, dim, spacedim, Number>::FEPointEvaluation(
   FEPointEvaluation<n_components_, dim, spacedim, Number> &other) noexcept
   : n_q_points(other.n_q_points)
   , n_q_points_scalar(other.n_q_points_scalar)
-  , n_filled_lanes_last_batch(other.n_filled_lanes_last_batch)
   , mapping(other.mapping)
   , fe(other.fe)
   , poly(other.poly)
@@ -1308,7 +1300,6 @@ FEPointEvaluation<n_components_, dim, spacedim, Number>::FEPointEvaluation(
   FEPointEvaluation<n_components_, dim, spacedim, Number> &&other) noexcept
   : n_q_points(other.n_q_points)
   , n_q_points_scalar(other.n_q_points_scalar)
-  , n_filled_lanes_last_batch(other.n_filled_lanes_last_batch)
   , mapping(other.mapping)
   , fe(other.fe)
   , poly(other.poly)
@@ -1492,12 +1483,9 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::do_reinit()
     mapping_info->get_n_q_points_unvectorized(current_cell_index,
                                               current_face_number);
 
-  const_cast<unsigned int &>(n_filled_lanes_last_batch) =
-    n_q_points_scalar % n_lanes_user_interface;
   const_cast<unsigned int &>(n_q_points) =
-    n_q_points_scalar / n_lanes_user_interface;
-  if (n_filled_lanes_last_batch > 0)
-    ++const_cast<unsigned int &>(n_q_points);
+    n_q_points_scalar / n_lanes_user_interface +
+    (n_q_points_scalar % n_lanes_user_interface > 0 ? 1 : 0);
 
   // set unit point pointer
   const unsigned int unit_point_offset =
@@ -1639,9 +1627,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   ETT::access(values[qb],
                               v,
@@ -1651,9 +1637,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   ETT::access(values[qb],
                               v,
@@ -1676,9 +1660,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   ETT::access(gradients[qb],
                               v,
@@ -1688,9 +1670,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   ETT::access(gradients[qb],
                               v,
@@ -1748,7 +1728,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
        ++qb, q += n_lanes_internal)
     {
       const bool incomplete_last_batch =
-        (qb == (n_q_points - 1)) && (n_filled_lanes_last_batch > 0);
+        q + n_lanes_user_interface > n_q_points_scalar;
 
       vectorized_value_type                 value = {};
       Tensor<1, dim, vectorized_value_type> gradient;
@@ -1758,6 +1738,8 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
           // zero out lanes of incomplete last quadrature point batch
           if (incomplete_last_batch)
             {
+              const unsigned int n_filled_lanes_last_batch =
+                n_q_points_scalar % n_lanes_internal;
               for (unsigned int v = n_filled_lanes_last_batch;
                    v < n_lanes_internal;
                    ++v)
@@ -1772,6 +1754,8 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
           // zero out lanes of incomplete last quadrature point batch
           if (incomplete_last_batch)
             {
+              const unsigned int n_filled_lanes_last_batch =
+                n_q_points_scalar % n_lanes_internal;
               for (unsigned int v = n_filled_lanes_last_batch;
                    v < n_lanes_internal;
                    ++v)
@@ -1851,19 +1835,15 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   solution_values[i] += fe_values->shape_value(i, q + v) *
                                         ETT::access(values[qb], v, d);
             else if (nonzero_shape_function_component[i][d])
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   solution_values[i] +=
                     fe_values->shape_value_component(i, q + v, d) *
@@ -1882,19 +1862,15 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_slow(
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   solution_values[i] += fe_values->shape_grad(i, q + v) *
                                         ETT::access(gradients[qb], v, d);
             else if (nonzero_shape_function_component[i][d])
               for (unsigned int qb = 0, q = 0; q < n_points;
                    ++qb, q += n_lanes_user_interface)
                 for (unsigned int v = 0;
-                     v < (q + n_lanes_user_interface > n_points ?
-                            n_filled_lanes_last_batch :
-                            n_lanes_user_interface);
+                     v < n_lanes_user_interface && q + v < n_points;
                      ++v)
                   solution_values[i] +=
                     fe_values->shape_grad_component(i, q + v, d) *