Skip to content

Commit

Permalink
Remove n_filled_lanes_last_batch
Browse files Browse the repository at this point in the history
  • Loading branch information
bergbauer committed May 1, 2023
1 parent b325713 commit 9e42a0d
Showing 1 changed file with 16 additions and 40 deletions.
56 changes: 16 additions & 40 deletions include/deal.II/matrix_free/fe_point_evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ class FEPointEvaluation
static constexpr std::size_t n_lanes_internal =
internal::VectorizedArrayTrait<VectorizedArrayType>::width;
static constexpr std::size_t stride =
dealii::internal::VectorizedArrayTrait<Number>::stride;
internal::VectorizedArrayTrait<Number>::stride;

/**
* Common setup function for both constructors. Does the setup for both fast
Expand Down Expand Up @@ -1037,11 +1037,6 @@ class FEPointEvaluation
*/
const unsigned int n_q_points_scalar;

/**
* Number of active quadrature points of the last quadrature point batch.
*/
const unsigned int n_filled_lanes_last_batch;

/**
* Pointer to the Mapping object passed to the constructor.
*/
Expand Down Expand Up @@ -1222,7 +1217,6 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::FEPointEvaluation(
const unsigned int first_selected_component)
: n_q_points(numbers::invalid_unsigned_int)
, n_q_points_scalar(numbers::invalid_unsigned_int)
, n_filled_lanes_last_batch(numbers::invalid_unsigned_int)
, mapping(&mapping)
, fe(&fe)
, update_flags(update_flags)
Expand All @@ -1247,7 +1241,6 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::FEPointEvaluation(
const unsigned int first_selected_component)
: n_q_points(numbers::invalid_unsigned_int)
, n_q_points_scalar(numbers::invalid_unsigned_int)
, n_filled_lanes_last_batch(numbers::invalid_unsigned_int)
, mapping(&mapping_info.get_mapping())
, fe(&fe)
, update_flags(mapping_info.get_update_flags())
Expand All @@ -1268,7 +1261,6 @@ FEPointEvaluation<n_components_, dim, spacedim, Number>::FEPointEvaluation(
FEPointEvaluation<n_components_, dim, spacedim, Number> &other) noexcept
: n_q_points(other.n_q_points)
, n_q_points_scalar(other.n_q_points_scalar)
, n_filled_lanes_last_batch(other.n_filled_lanes_last_batch)
, mapping(other.mapping)
, fe(other.fe)
, poly(other.poly)
Expand Down Expand Up @@ -1308,7 +1300,6 @@ FEPointEvaluation<n_components_, dim, spacedim, Number>::FEPointEvaluation(
FEPointEvaluation<n_components_, dim, spacedim, Number> &&other) noexcept
: n_q_points(other.n_q_points)
, n_q_points_scalar(other.n_q_points_scalar)
, n_filled_lanes_last_batch(other.n_filled_lanes_last_batch)
, mapping(other.mapping)
, fe(other.fe)
, poly(other.poly)
Expand Down Expand Up @@ -1492,12 +1483,9 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::do_reinit()
mapping_info->get_n_q_points_unvectorized(current_cell_index,
current_face_number);

const_cast<unsigned int &>(n_filled_lanes_last_batch) =
n_q_points_scalar % n_lanes_user_interface;
const_cast<unsigned int &>(n_q_points) =
n_q_points_scalar / n_lanes_user_interface;
if (n_filled_lanes_last_batch > 0)
++const_cast<unsigned int &>(n_q_points);
n_q_points_scalar / n_lanes_user_interface +
(n_q_points_scalar % n_lanes_user_interface > 0 ? 1 : 0);

// set unit point pointer
const unsigned int unit_point_offset =
Expand Down Expand Up @@ -1639,9 +1627,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
ETT::access(values[qb],
v,
Expand All @@ -1651,9 +1637,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
ETT::access(values[qb],
v,
Expand All @@ -1676,9 +1660,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
ETT::access(gradients[qb],
v,
Expand All @@ -1688,9 +1670,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::evaluate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
ETT::access(gradients[qb],
v,
Expand Down Expand Up @@ -1748,7 +1728,7 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
++qb, q += n_lanes_internal)
{
const bool incomplete_last_batch =
(qb == (n_q_points - 1)) && (n_filled_lanes_last_batch > 0);
q + n_lanes_user_interface > n_q_points_scalar;

vectorized_value_type value = {};
Tensor<1, dim, vectorized_value_type> gradient;
Expand All @@ -1758,6 +1738,8 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
// zero out lanes of incomplete last quadrature point batch
if (incomplete_last_batch)
{
const unsigned int n_filled_lanes_last_batch =
n_q_points_scalar % n_lanes_internal;
for (unsigned int v = n_filled_lanes_last_batch;
v < n_lanes_internal;
++v)
Expand All @@ -1772,6 +1754,8 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_fast(
// zero out lanes of incomplete last quadrature point batch
if (incomplete_last_batch)
{
const unsigned int n_filled_lanes_last_batch =
n_q_points_scalar % n_lanes_internal;
for (unsigned int v = n_filled_lanes_last_batch;
v < n_lanes_internal;
++v)
Expand Down Expand Up @@ -1851,19 +1835,15 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
solution_values[i] += fe_values->shape_value(i, q + v) *
ETT::access(values[qb], v, d);
else if (nonzero_shape_function_component[i][d])
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
solution_values[i] +=
fe_values->shape_value_component(i, q + v, d) *
Expand All @@ -1882,19 +1862,15 @@ FEPointEvaluation<n_components, dim, spacedim, Number>::integrate_slow(
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
solution_values[i] += fe_values->shape_grad(i, q + v) *
ETT::access(gradients[qb], v, d);
else if (nonzero_shape_function_component[i][d])
for (unsigned int qb = 0, q = 0; q < n_points;
++qb, q += n_lanes_user_interface)
for (unsigned int v = 0;
v < (q + n_lanes_user_interface > n_points ?
n_filled_lanes_last_batch :
n_lanes_user_interface);
v < n_lanes_user_interface && q + v < n_points;
++v)
solution_values[i] +=
fe_values->shape_grad_component(i, q + v, d) *
Expand Down

0 comments on commit 9e42a0d

Please sign in to comment.