Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TensorProductMatrixSymmetricSum: small modifications #15289

Merged
merged 1 commit into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
103 changes: 27 additions & 76 deletions include/deal.II/lac/tensor_product_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,28 +209,11 @@ class TensorProductMatrixSymmetricSum
* described in the main documentation of TensorProductMatrixSymmetricSum.
* This function is operating on ArrayView to allow checks of
* array bounds with respect to @p dst and @p src.
*
* @warning This function works on an internal temporal array, leading to
* increased memory consumption if many instances of this class are created,
* e.g., a different object on every cell with different underlying
* coefficients each. Furthermore, only one thread runs this function at a
* time (ensured internally with a mutex). If these two limitations are an
* issue, please consider the other version of this function.
*/
void
apply_inverse(const ArrayView<Number> & dst,
const ArrayView<const Number> &src) const;

/**
* Same as above but letting the user provide a user-owned temporary array,
* resolving the two issues described above. This array is resized
* internally to the needed size.
*/
void
apply_inverse(const ArrayView<Number> & dst,
const ArrayView<const Number> &src,
AlignedVector<Number> & tmp) const;

/**
* Return the memory consumption of the allocated memory in this class.
*/
Expand Down Expand Up @@ -418,8 +401,7 @@ class TensorProductMatrixSymmetricSumCollection
void
apply_inverse(const unsigned int index,
const ArrayView<Number> & dst_in,
const ArrayView<const Number> &src_in,
AlignedVector<Number> & tmp_array) const;
const ArrayView<const Number> &src_in) const;

/**
* Return the memory consumption of this class in bytes.
Expand Down Expand Up @@ -819,21 +801,16 @@ namespace internal

template <int n_rows_1d_templated, std::size_t dim, typename Number>
void
apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> &tmp,
const unsigned int n_rows_1d_non_templated,
apply_inverse(Number * dst,
const Number * src,
const unsigned int n_rows_1d_non_templated,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
const Number *inverted_eigenvalues = nullptr)
{
const unsigned int n_rows_1d = n_rows_1d_templated == 0 ?
n_rows_1d_non_templated :
n_rows_1d_templated;
const unsigned int n = Utilities::fixed_power<dim>(n_rows_1d);

tmp.resize_fast(n);
Number *t = tmp.begin();

internal::EvaluatorTensorProduct<internal::evaluate_general,
dim,
Expand All @@ -850,23 +827,23 @@ namespace internal
if (dim == 1)
{
const Number *S = eigenvectors[0];
eval.template apply<0, true, false>(S, src, t);
eval.template apply<0, true, false>(S, src, dst);

for (unsigned int i = 0; i < n_rows_1d; ++i)
if (inverted_eigenvalues)
t[i] *= inverted_eigenvalues[i];
dst[i] *= inverted_eigenvalues[i];
else
t[i] /= eigenvalues[0][i];
dst[i] /= eigenvalues[0][i];

eval.template apply<0, false, false>(S, t, dst);
eval.template apply<0, false, false>(S, dst, dst);
}

else if (dim == 2)
{
const Number *S0 = eigenvectors[0];
const Number *S1 = eigenvectors[1];
eval.template apply<0, true, false>(S0, src, t);
eval.template apply<1, true, false>(S1, t, dst);
eval.template apply<0, true, false>(S0, src, dst);
eval.template apply<1, true, false>(S1, dst, dst);

for (unsigned int i1 = 0, c = 0; i1 < n_rows_1d; ++i1)
for (unsigned int i0 = 0; i0 < n_rows_1d; ++i0, ++c)
Expand All @@ -875,31 +852,31 @@ namespace internal
else
dst[c] /= (eigenvalues[1][i1] + eigenvalues[0][i0]);

eval.template apply<0, false, false>(S0, dst, t);
eval.template apply<1, false, false>(S1, t, dst);
eval.template apply<1, false, false>(S1, dst, dst);
eval.template apply<0, false, false>(S0, dst, dst);
}

else if (dim == 3)
{
const Number *S0 = eigenvectors[0];
const Number *S1 = eigenvectors[1];
const Number *S2 = eigenvectors[2];
eval.template apply<0, true, false>(S0, src, t);
eval.template apply<1, true, false>(S1, t, dst);
eval.template apply<2, true, false>(S2, dst, t);
eval.template apply<0, true, false>(S0, src, dst);
eval.template apply<1, true, false>(S1, dst, dst);
eval.template apply<2, true, false>(S2, dst, dst);

for (unsigned int i2 = 0, c = 0; i2 < n_rows_1d; ++i2)
for (unsigned int i1 = 0; i1 < n_rows_1d; ++i1)
for (unsigned int i0 = 0; i0 < n_rows_1d; ++i0, ++c)
if (inverted_eigenvalues)
t[c] *= inverted_eigenvalues[c];
dst[c] *= inverted_eigenvalues[c];
else
t[c] /= (eigenvalues[2][i2] + eigenvalues[1][i1] +
eigenvalues[0][i0]);
dst[c] /= (eigenvalues[2][i2] + eigenvalues[1][i1] +
eigenvalues[0][i0]);

eval.template apply<0, false, false>(S0, t, dst);
eval.template apply<1, false, false>(S1, dst, t);
eval.template apply<2, false, false>(S2, t, dst);
eval.template apply<2, false, false>(S2, dst, dst);
eval.template apply<1, false, false>(S1, dst, dst);
eval.template apply<0, false, false>(S0, dst, dst);
}

else
Expand All @@ -923,7 +900,6 @@ namespace internal
void
select_apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> & tmp,
const unsigned int n_rows_1d,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
Expand Down Expand Up @@ -1016,19 +992,6 @@ inline void
TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
const ArrayView<Number> & dst_view,
const ArrayView<const Number> &src_view) const
{
std::lock_guard<std::mutex> lock(this->mutex);
this->apply_inverse(dst_view, src_view, this->tmp_array);
}



template <int dim, typename Number, int n_rows_1d>
inline void
TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
const ArrayView<Number> & dst_view,
const ArrayView<const Number> &src_view,
AlignedVector<Number> & tmp_array) const
{
AssertDimension(dst_view.size(), this->n());
AssertDimension(src_view.size(), this->m());
Expand All @@ -1049,10 +1012,10 @@ TensorProductMatrixSymmetricSum<dim, Number, n_rows_1d>::apply_inverse(
if (n_rows_1d != -1)
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(
dst, src, tmp_array, n_rows_1d_non_templated, eigenvectors, eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
else
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst, src, tmp_array, n_rows_1d_non_templated, eigenvectors, eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
}


Expand Down Expand Up @@ -1518,8 +1481,7 @@ void
TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
apply_inverse(const unsigned int index,
const ArrayView<Number> & dst_in,
const ArrayView<const Number> &src_in,
AlignedVector<Number> & tmp_array) const
const ArrayView<const Number> &src_in) const
{
Number * dst = dst_in.begin();
const Number *src = src_in.begin();
Expand All @@ -1545,20 +1507,11 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::

if (n_rows_1d != -1)
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
eigenvalues);
n_rows_1d == -1 ? 0 : n_rows_1d>(
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
else
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
eigenvalues);
dst, src, n_rows_1d_non_templated, eigenvectors, eigenvalues);
}
else
{
Expand Down Expand Up @@ -1595,7 +1548,6 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
internal::TensorProductMatrixSymmetricSum::apply_inverse<
n_rows_1d == -1 ? 0 : n_rows_1d>(dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
{},
Expand All @@ -1604,7 +1556,6 @@ TensorProductMatrixSymmetricSumCollection<dim, Number, n_rows_1d>::
internal::TensorProductMatrixSymmetricSum::select_apply_inverse<1>(
dst,
src,
tmp_array,
n_rows_1d_non_templated,
eigenvectors,
{},
Expand Down
27 changes: 5 additions & 22 deletions include/deal.II/lac/tensor_product_matrix.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,37 +66,20 @@ namespace internal
void
select_apply_inverse(Number * dst,
const Number * src,
AlignedVector<Number> & tmp,
const unsigned int n_rows_1d,
const std::array<const Number *, dim> &eigenvectors,
const std::array<const Number *, dim> &eigenvalues,
const Number *inverted_eigenvalues)
{
if (n_rows_1d_templated == n_rows_1d)
apply_inverse<n_rows_1d_templated>(dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
apply_inverse<n_rows_1d_templated>(
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
else if (n_rows_1d_templated < FDM_N_ROWS_MAX)
select_apply_inverse<std::min(n_rows_1d_templated + 1, FDM_N_ROWS_MAX)>(
dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
else
apply_inverse<0>(dst,
src,
tmp,
n_rows_1d,
eigenvectors,
eigenvalues,
inverted_eigenvalues);
apply_inverse<0>(
dst, src, n_rows_1d, eigenvectors, eigenvalues, inverted_eigenvalues);
}
} // namespace TensorProductMatrixSymmetricSum
} // namespace internal
Expand Down
6 changes: 2 additions & 4 deletions source/lac/tensor_product_matrix.inst.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ for (deal_II_dimension : DIMENSIONS;

template void select_apply_inverse<1>(
deal_II_scalar_vectorized * dst,
const deal_II_scalar_vectorized * src,
AlignedVector<deal_II_scalar_vectorized> &tmp,
const unsigned int n_rows,
const deal_II_scalar_vectorized *src,
const unsigned int n_rows,
const std::array<const deal_II_scalar_vectorized *, deal_II_dimension>
&eigenvectors,
const std::array<const deal_II_scalar_vectorized *, deal_II_dimension>
Expand All @@ -53,7 +52,6 @@ for (deal_II_dimension : DIMENSIONS; deal_II_scalar : REAL_SCALARS)
template void select_apply_inverse<1>(
deal_II_scalar * dst,
const deal_II_scalar * src,
AlignedVector<deal_II_scalar> & tmp,
const unsigned int n_rows,
const std::array<const deal_II_scalar *, deal_II_dimension> &eigenvectors,
const std::array<const deal_II_scalar *, deal_II_dimension> &eigenvalues,
Expand Down
13 changes: 4 additions & 9 deletions tests/lac/tensor_product_matrix_08.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ do_test_mesh(const Mapping<dim> &mapping, const Triangulation<dim> &tria)

AlignedVector<Number> src(fe.n_dofs_per_cell());
AlignedVector<Number> dst(fe.n_dofs_per_cell());
AlignedVector<Number> tmp;
Table<2, Number> matrix_0(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Table<2, Number> matrix_1(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Table<2, Number> matrix_2(fe.n_dofs_per_cell(), fe.n_dofs_per_cell());
Expand All @@ -145,29 +144,25 @@ do_test_mesh(const Mapping<dim> &mapping, const Triangulation<dim> &tria)

collection_0.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_0[j][i] = dst[j];

collection_1.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_1[j][i] = dst[j];

collection_2.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_2[j][i] = dst[j];

collection_3.apply_inverse(cell,
make_array_view(dst.begin(), dst.end()),
make_array_view(src.begin(), src.end()),
tmp);
make_array_view(src.begin(), src.end()));
for (unsigned int j = 0; j < fe.n_dofs_per_cell(); ++j)
matrix_3[j][i] = dst[j];
}
Expand Down