Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MGTwoLevelTransfer: improve setup of weights #16322

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
181 changes: 74 additions & 107 deletions include/deal.II/multigrid/mg_transfer_global_coarsening.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -1618,97 +1618,98 @@ namespace internal

class MGTwoLevelTransferImplementation
{
/**
* Compute weights.
*/
template <int dim, typename Number>
static void
compute_weights(
setup_weights(
const dealii::AffineConstraints<Number> &constraints_fine,
const MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
&transfer,
LinearAlgebra::distributed::Vector<Number> &touch_count)
MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
&transfer,
bool is_feq)
{
touch_count.reinit(transfer.partitioner_fine);
if (transfer.fine_element_is_continuous == false)
return; // nothing to do

for (const auto i : transfer.constraint_info_fine.dof_indices)
touch_count.local_element(i) += 1;
touch_count.compress(VectorOperation::add);
// 1) compute weights globally
LinearAlgebra::distributed::Vector<Number> weight_vector;

for (unsigned int i = 0; i < touch_count.locally_owned_size(); ++i)
touch_count.local_element(i) =
constraints_fine.is_constrained(
touch_count.get_partitioner()->local_to_global(i)) ?
Number(0.) :
Number(1.) / touch_count.local_element(i);
weight_vector.reinit(transfer.partitioner_fine);

touch_count.update_ghost_values();
}
// ... compute valence of DoFs
for (const auto i : transfer.constraint_info_fine.dof_indices)
weight_vector.local_element(i) += 1;
weight_vector.compress(VectorOperation::add);

// ... invert valence
for (unsigned int i = 0; i < weight_vector.locally_owned_size(); ++i)
weight_vector.local_element(i) =
Number(1.) / weight_vector.local_element(i);

// ... clear constrained indices
for (const auto &i : constraints_fine.get_lines())
if (weight_vector.locally_owned_elements().is_element(i.index))
weight_vector[i.index] = 0.0;

/**
* Try to compress weights to Utilities::pow(3, dim) doubles.
* Weights of a cell can be compressed if all components have the
* same weights.
*/
template <int dim, typename Number>
static void
compress_weights(
MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
&transfer)
{
unsigned int n_cells = 0;
for (const auto &scheme : transfer.schemes)
n_cells += scheme.n_coarse_cells;
weight_vector.update_ghost_values();

std::vector<std::array<Number, Utilities::pow(3, dim)>> masks(n_cells);
// 2) store data cell-wise a DG format and try to compress
transfer.weights.resize(transfer.constraint_info_fine.dof_indices.size());

const Number *weight_ptr = transfer.weights.data();
std::array<Number, Utilities::pow(3, dim)> *mask_ptr = masks.data();
const unsigned int n_lanes = VectorizedArray<Number>::size();
unsigned int offset = 0;
std::array<VectorizedArray<Number>, Utilities::pow(3, dim)>
mask_vectorized;
std::array<Number, Utilities::pow(3, dim)> mask;

// (try to) compress weights for each cell
// ... loop over cells
for (const auto &scheme : transfer.schemes)
for (unsigned int cell = 0; cell < scheme.n_coarse_cells;
++cell, weight_ptr += scheme.n_dofs_per_cell_fine, ++mask_ptr)
if (!compute_weights_fe_q_dofs_by_entity<dim, -1, Number>(
weight_ptr,
transfer.n_components,
scheme.degree_fine + 1,
mask_ptr->data()))
return;

// vectorize weights
AlignedVector<VectorizedArray<Number>> masks_vectorized;

const unsigned int n_lanes = VectorizedArray<Number>::size();

unsigned int c = 0;
cell += n_lanes)
{
const unsigned int n_lanes_filled =
(cell + n_lanes > scheme.n_coarse_cells) ?
(scheme.n_coarse_cells - cell) :
n_lanes;

for (const auto &scheme : transfer.schemes)
{
for (unsigned int cell = 0; cell < scheme.n_coarse_cells;
cell += n_lanes)
{
const unsigned int n_lanes_filled =
(cell + n_lanes > scheme.n_coarse_cells) ?
(scheme.n_coarse_cells - cell) :
n_lanes;
if (is_feq)
mask_vectorized.fill(VectorizedArray<Number>());

std::array<VectorizedArray<Number>, Utilities::pow(3, dim)> mask;
mask.fill(VectorizedArray<Number>());
for (unsigned int v = 0; v < n_lanes_filled;
++v, offset += scheme.n_dofs_per_cell_fine)
{
// ... store data cell-wise a DG format
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine; ++i)
transfer.weights[offset + i] = weight_vector.local_element(
transfer.constraint_info_fine.dof_indices[offset + i]);

for (unsigned int v = 0; v < n_lanes_filled; ++v, ++c)
{
for (unsigned int i = 0;
i < Utilities::pow<unsigned int>(3, dim);
++i)
mask[i][v] = masks[c][i];
}
if (is_feq)
{
// ... try to compress
is_feq =
compute_weights_fe_q_dofs_by_entity<dim, -1, Number>(
transfer.weights.data() + offset,
transfer.n_components,
scheme.degree_fine + 1,
mask.data());

// ... vectorize data
for (unsigned int j = 0; j < mask_vectorized.size(); ++j)
mask_vectorized[j][v] = mask[j];
}
}

masks_vectorized.insert_back(mask.begin(), mask.end());
}
}
if (is_feq)
transfer.weights_compressed.insert_back(mask_vectorized.begin(),
mask_vectorized.end());
}

// copy result
transfer.weights_compressed = masks_vectorized;
// 3) clean up
if (is_feq)
transfer.weights.clear();
else
transfer.weights_compressed.clear();
}


Expand Down Expand Up @@ -2238,24 +2239,7 @@ namespace internal


// ------------------------------- weights -------------------------------
if (transfer.fine_element_is_continuous)
{
// compute weights globally
LinearAlgebra::distributed::Vector<Number> weight_vector;
compute_weights(constraints_fine, transfer, weight_vector);

// ... and store them cell-wise a DG format
transfer.weights.resize(n_dof_indices_fine.back());

for (unsigned int i = 0;
i < transfer.constraint_info_fine.dof_indices.size();
++i)
transfer.weights[i] = weight_vector.local_element(
transfer.constraint_info_fine.dof_indices[i]);

if (is_feq)
compress_weights(transfer);
}
setup_weights(constraints_fine, transfer, is_feq);
}


Expand Down Expand Up @@ -2719,24 +2703,7 @@ namespace internal
}

// ------------------------------- weights -------------------------------
if (transfer.fine_element_is_continuous)
{
// compute weights globally
LinearAlgebra::distributed::Vector<Number> weight_vector;
compute_weights(constraints_fine, transfer, weight_vector);

// ... and store them cell-wise a DG format
transfer.weights.resize(n_dof_indices_fine.back());

for (unsigned int i = 0;
i < transfer.constraint_info_fine.dof_indices.size();
++i)
transfer.weights[i] = weight_vector.local_element(
transfer.constraint_info_fine.dof_indices[i]);

if (is_feq)
compress_weights(transfer);
}
setup_weights(constraints_fine, transfer, is_feq);
}
};

Expand Down