Skip to content

Commit

Permalink
Don't use tmp vector
Browse files Browse the repository at this point in the history
  • Loading branch information
peterrum committed May 31, 2023
1 parent 2896a7e commit 06186bd
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 111 deletions.
103 changes: 27 additions & 76 deletions include/deal.II/lac/tensor_product_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,28 +209,11 @@ class TensorProductMatrixSymmetricSum
* described in the main documentation of TensorProductMatrixSymmetricSum.
* This function is operating on ArrayView to allow checks of
* array bounds with respect to @p dst and @p src.
*
* @warning This function works on an internal temporal array, leading to
* increased memory consumption if many instances of this class are created,
* e.g., a different object on every cell with different underlying
* coefficients each. Furthermore, only one thread runs this function at a
* time (ensured internally with a mutex). If these two limitations are an
* issue, please consider the other version of this function.
*/
void
apply_inverse(const ArrayView<Number> & dst,
const ArrayView<const Number> &src) const;

/**
* Same as above but letting the user provide a user-owned temporary array,
* resolving the two issues described above. This array is resized
* internally to the needed size.
*/
void
apply_inverse(const ArrayView<Number> & dst,
const ArrayView<const Number> &src,
AlignedVector<Number> & tmp) const;

/**
* Return the memory consumption of the allocated memory in this class.
*/
Expand Down Expand Up @@ -418,8 +401,7 @@ class TensorProductMatrixSymmetricSumCollection
void
apply_inverse(const unsigned int index,
const ArrayView<Number> & dst_in,
const ArrayView<const Number> &src_in,
AlignedVector<Number> & tmp_array) const;
const ArrayView<const Number> &src_in) const;

/**
* Return the memory consumption of this class in bytes.
Expand Down Expand Up @@ -819,21 +801,16 @@ namespace internal

template <int n_rows_1d_templated, std::size_t dim, typename Number>
void
apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> &tmp,
const unsigned int n_rows_1d_non_templated,
apply_inverse(Number * dst,
const Number * src,
const unsigned int n_rows_1d_non_templated,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
const Number *inverted_eigenvalues = nullptr)
{
const unsigned int n_rows_1d = n_rows_1d_templated == 0 ?
n_rows_1d_non_templated :
n_rows_1d_templated;
const unsigned int n = Utilities::fixed_power<dim>(n_rows_1d);

tmp.resize_fast(n);
Number *t = tmp.begin();

internal::EvaluatorTensorProduct<internal::evaluate_general,
dim,
Expand All @@ -850,23 +827,23 @@ namespace internal
if (dim == 1)
{
const Number *S = eigenvectors[0];
eval.template apply<0, true, false>(S, src, t);
eval.template apply<0, true, false>(S, src, dst);

for (unsigned int i = 0; i < n_rows_1d; ++i)
if (inverted_eigenvalues)
t[i] *= inverted_eigenvalues[i];
dst[i] *= inverted_eigenvalues[i];
else
t[i] /= eigenvalues[0][i];
dst[i] /= eigenvalues[0][i];

eval.template apply<0, false, false>(S, t, dst);
eval.template apply<0, false, false>(S, dst, dst);
}

else if (dim == 2)
{
const Number *S0 = eigenvectors[0];
const Number *S1 = eigenvectors[1];
eval.template apply<0, true, false>(S0, src, t);
eval.template apply<1, true, false>(S1, t, dst);
eval.template apply<0, true, false>(S0, src, dst);
eval.template apply<1, true, false>(S1, dst, dst);

for (unsigned int i1 = 0, c = 0; i1 < n_rows_1d; ++i1)
for (unsigned int i0 = 0; i0 < n_rows_1d; ++i0, ++c)
Expand All @@ -875,31 +852,31 @@ namespace internal
else
dst[c] /= (eigenvalues[1][i1] + eigenvalues[0][i0]);

eval.template apply<0, false, false>(S0, dst, t);
eval.template apply<1, false, false>(S1, t, dst);
eval.template apply<1, false, false>(S1, dst, dst);
eval.template apply<0, false, false>(S0, dst, dst);
}

else if (dim == 3)
{
const Number *S0 = eigenvectors[0];
const Number *S1 = eigenvectors[1];
const Number *S2 = eigenvectors[2];
eval.template apply<0, true, false>(S0, src, t);
eval.template apply<1, true, false>(S1, t, dst);
eval.template apply<2, true, false>(S2, dst, t);
eval.template apply<0, true, false>(S0, src, dst);
eval.template apply<1, true, false>(S1, dst, dst);
eval.template apply<2, true, false>(S2, dst, dst);

for (unsigned int i2 = 0, c = 0; i2 < n_rows_1d; ++i2)
for (unsigned int i1 = 0; i1 < n_rows_1d; ++i1)
for (unsigned int i0 = 0; i0 < n_rows_1d; ++i0, ++c)
if (inverted_eigenvalues)
t[c] *= inverted_eigenvalues[c];
dst[c] *= inverted_eigenvalues[c];
else
t[c] /= (eigenvalues[2][i2] + eigenvalues[1][i1] +
eigenvalues[0][i0]);
dst[c] /= (eigenvalues[2][i2] + eigenvalues[1][i1] +
eigenvalues[0][i0]);

eval.template apply<0, false, false>(S0, t, dst);
eval.template apply<1, false, false>(S1, dst, t);
eval.template apply<2, false, false>(S2, t, dst);
eval.template apply<2, false, false>(S2, dst, dst);
eval.template apply<1, false, false>(S1, dst, dst);
eval.template apply<0, false, false>(S0, dst, dst);
}

else
Expand All @@ -923,7 +900,6 @@ namespace internal
void
select_apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> & tmp,
const unsigned int n_rows_1d,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
Expand Down Expand Up @@ -1016,19 +992,6 @@ inline void
TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
const ArrayView<Number> & dst_view,
const ArrayView<const Number> &src_view) const
{
std::lock_guard<std::mutex> lock(this->mutex);
this->apply_inverse(dst_view, src_view, this->tmp_array);
}



template <int dim, typename Number, int n_rows_1d>
inline void
TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
const ArrayView<Number> & dst_view,
const ArrayView<const Number> &src_view,
AlignedVector<Number> & tmp_array) const
{
AssertDimension(dst_view.size(), this->n());
AssertDimension(src_view.size(), this->m());
Expand All @@ -1049,10 +1012,10 @@ TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
if (n_rows_1d != -1)
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(
dst, src, tmp_array, n_rows_1d_non_templated, eigenvectors, eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
else
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst, src, tmp_array, n_rows_1d_non_templated, eigenvectors, eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
}


Expand Down Expand Up @@ -1518,8 +1481,7 @@ void
TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
apply_inverse(const unsigned int index,
const ArrayView<Number> & dst_in,
const ArrayView<const Number> &src_in,
AlignedVector<Number> & tmp_array) const
const ArrayView<const Number> &src_in) const
{
Number * dst = dst_in.begin();
const Number *src = src_in.begin();
Expand All @@ -1545,20 +1507,11 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::

if (n_rows_1d != -1)
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
eigenvalues);
n_rows_1d == -1 ? 0 : n_rows_1d>(
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
else
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
}
else
{
Expand Down Expand Up @@ -1595,7 +1548,6 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
{},
Expand All @@ -1604,7 +1556,6 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
{},
Expand Down
27 changes: 5 additions & 22 deletions include/deal.II/lac/tensor_product_matrix.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,37 +66,20 @@ namespace internal
void
select_apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> & tmp,
const unsigned int n_rows_1d,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
const Number *inverted_eigenvalues)
{
if (n_rows_1d_templated == n_rows_1d)
apply_inverse<n_rows_1d_templated>(dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
apply_inverse<n_rows_1d_templated>(
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
else if (n_rows_1d_templated < FDM_N_ROWS_MAX)
select_apply_inverse<std::min(n_rows_1d_templated + 1, FDM_N_ROWS_MAX)>(
dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
else
apply_inverse<0>(dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
apply_inverse<0>(
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
}
} // namespace TensorProductMatrixSymmetricSum
} // namespace internal
Expand Down
6 changes: 2 additions & 4 deletions source/lac/tensor_product_matrix.inst.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ for (deal_II_dimension : DIMENSIONS;

template void select_apply_inverse<1>(
deal_II_scalar_vectorized * dst,
const deal_II_scalar_vectorized * src,
AlignedVector<deal_II_scalar_vectorized> &tmp,
const unsigned int n_rows,
const deal_II_scalar_vectorized *src,
const unsigned int n_rows,
const std::array<const deal_II_scalar_vectorized *, deal_II_dimension>
&eigenvectors,
const std::array<const deal_II_scalar_vectorized *, deal_II_dimension>
Expand All @@ -53,7 +52,6 @@ for (deal_II_dimension : DIMENSIONS; deal_II_scalar : REAL_SCALARS)
template void select_apply_inverse<1>(
deal_II_scalar * dst,
const deal_II_scalar * src,
AlignedVector<deal_II_scalar> & tmp,
const unsigned int n_rows,
const std::array<const deal_II_scalar *, deal_II_dimension> &eigenvectors,
const std::array<const deal_II_scalar *, deal_II_dimension> &eigenvalues,
Expand Down
13 changes: 4 additions & 9 deletions tests/lac/tensor_product_matrix_08.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ do_test_mesh(const Mapping<dim> &mapping, const Triangulation<dim> &tria)

AlignedVector<Number> src(fe.n_dofs_per_cell());
AlignedVector<Number> dst(fe.n_dofs_per_cell());
AlignedVector<Number> tmp;
Table<2, Number> matrix_0(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Table<2, Number> matrix_1(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Table<2, Number> matrix_2(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Expand All @@ -145,29 +144,25 @@ do_test_mesh(const Mapping<dim> &mapping, const Triangulation<dim> &tria)

collection_0.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_0[j][i] = dst[j];

collection_1.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_1[j][i] = dst[j];

collection_2.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_2[j][i] = dst[j];

collection_3.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_3[j][i] = dst[j];
}
Expand Down

0 comments on commit 06186bd

Please sign in to comment.