Skip to content

Commit

Permalink
Global coarsening: compress weights
Browse files Browse the repository at this point in the history
  • Loading branch information
peterrum committed Dec 20, 2021
1 parent 5cc5007 commit 5aedbc7
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 42 deletions.
6 changes: 6 additions & 0 deletions include/deal.II/multigrid/mg_transfer_global_coarsening.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,12 @@ class MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
*/
std::vector<unsigned int> level_dof_indices_fine;

/*
*
*/
std::vector<std::array<VectorizedArray<Number>, Utilities::pow(3, dim)>>
masks;

/**
* Number of components.
*/
Expand Down
280 changes: 238 additions & 42 deletions include/deal.II/multigrid/mg_transfer_global_coarsening.templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,110 @@ namespace internal



template <int dim, typename Number>
static void
compress_weights(
MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>
&transfer)
{
const Number *weights = transfer.weights.data();

const auto set = [](auto &mask, const auto &weight) {
if (mask == -1.0 || mask == weight)
{
mask = weight;
return true;
}

return false;
};

std::vector<std::array<Number, Utilities::pow(3, dim)>> masks;

for (const auto &scheme : transfer.schemes)
{
std::cout << scheme.degree_fine << std::endl;
const int loop_length = scheme.degree_fine + 1;
unsigned int degree_to_3[100];
degree_to_3[0] = 0;
for (int i = 1; i < loop_length - 1; ++i)
degree_to_3[i] = 1;
degree_to_3[loop_length - 1] = 2;

for (unsigned int cell = 0; cell < scheme.n_coarse_cells; ++cell)
{
std::vector<std::array<Number, Utilities::pow(3, dim)>> mask(
transfer.n_components);

for (unsigned int c = 0; c < transfer.n_components; ++c)
mask[c].fill(-1.0);

for (unsigned int c = 0; c < transfer.n_components; ++c)
for (int k = 0; k < (dim > 2 ? loop_length : 1); ++k)
for (int j = 0; j < (dim > 1 ? loop_length : 1); ++j)
{
const unsigned int shift =
9 * degree_to_3[k] + 3 * degree_to_3[j];

if (!set(mask[c][shift], weights[0]))
return;


for (int i = 1; i < loop_length - 1; ++i)
if (!set(mask[c][shift + 1], weights[i]))
return;

if (!set(mask[c][shift + 2], weights[loop_length - 1]))
return;

weights += loop_length;
}

if (std::find_if(mask.begin(), mask.end(), [&](const auto &a) {
return a != mask[0];
}) != mask.end())
return;

masks.push_back(mask[0]);
}
}

std::vector<std::array<VectorizedArray<Number>, Utilities::pow(3, dim)>>
masks_vectorized;

const unsigned int n_lanes = VectorizedArray<Number>::size();

unsigned int c = 0;

for (const auto &scheme : transfer.schemes)
{
for (unsigned int cell = 0; cell < scheme.n_coarse_cells;
cell += n_lanes)
{
const unsigned int n_lanes_filled =
(cell + n_lanes > scheme.n_coarse_cells) ?
(scheme.n_coarse_cells - cell) :
n_lanes;

std::array<VectorizedArray<Number>, Utilities::pow(3, dim)> mask;

for (unsigned int v = 0; v < n_lanes_filled; ++v, ++c)
{
for (unsigned int i = 0;
i < Utilities::pow<unsigned int>(3, dim);
++i)
mask[i][v] = masks[c][i];
}

masks_vectorized.push_back(mask);
}
}

transfer.masks = masks_vectorized;
}



public:
template <int dim, typename Number>
static void
Expand Down Expand Up @@ -1592,6 +1696,9 @@ namespace internal
weights_1 += transfer.schemes[1].n_dofs_per_cell_fine;
}
});

if (is_feq)
compress_weights(transfer);
}
}

Expand Down Expand Up @@ -2308,6 +2415,38 @@ namespace MGTransferGlobalCoarseningTools



namespace
{
template <int dim, typename Number>
void
weight_dofs_on_child(const VectorizedArray<Number> *weights,
const unsigned int n_components,
const int loop_length,
VectorizedArray<Number> * data)
{
unsigned int degree_to_3[100];
degree_to_3[0] = 0;
for (int i = 1; i < loop_length - 1; ++i)
degree_to_3[i] = 1;
degree_to_3[loop_length - 1] = 2;
for (unsigned int c = 0; c < n_components; ++c)
for (int k = 0; k < (dim > 2 ? loop_length : 1); ++k)
for (int j = 0; j < (dim > 1 ? loop_length : 1); ++j)
{
const unsigned int shift = 9 * degree_to_3[k] + 3 * degree_to_3[j];
data[0] *= weights[shift];
// loop bound as int avoids compiler warnings in case loop_length
// == 1 (polynomial degree 0)
for (int i = 1; i < loop_length - 1; ++i)
data[i] *= weights[shift + 1];
data[loop_length - 1] *= weights[shift + 2];
data += loop_length;
}
}
} // namespace



template <int dim, typename Number>
void
MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
Expand Down Expand Up @@ -2360,39 +2499,59 @@ MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
const unsigned int *indices_coarse = level_dof_indices_coarse.data();
const unsigned int *indices_fine = level_dof_indices_fine.data();
const Number * weights = nullptr;
const std::array<VectorizedArray<Number>, Utilities::pow(3, dim)> *masks =
nullptr;

if (fine_element_is_continuous)
weights = this->weights.data();
{
weights = this->weights.data();
masks = this->masks.data();
}

for (const auto &scheme : schemes)
{
// identity -> take short cut and work directly on global vectors
if (scheme.prolongation_matrix.size() == 0 &&
scheme.prolongation_matrix_1d.size() == 0)
{
for (unsigned int cell = 0; cell < scheme.n_coarse_cells; ++cell)
for (unsigned int cell = 0; cell < scheme.n_coarse_cells;
cell += n_lanes)
{
if ((scheme.n_dofs_per_cell_fine != 0) &&
(scheme.n_dofs_per_cell_coarse != 0))
const unsigned int n_lanes_filled =
(cell + n_lanes > scheme.n_coarse_cells) ?
(scheme.n_coarse_cells - cell) :
n_lanes;

// read from source vector
for (unsigned int v = 0; v < n_lanes_filled; ++v)
{
if ((scheme.n_dofs_per_cell_fine != 0) &&
(scheme.n_dofs_per_cell_coarse != 0))
{
if (fine_element_is_continuous)
for (unsigned int i = 0;
i < scheme.n_dofs_per_cell_fine;
++i)
vec_fine_ptr->local_element(indices_fine[i]) +=
read_dof_values(indices_coarse[i], vec_coarse) *
weights[i];
else
for (unsigned int i = 0;
i < scheme.n_dofs_per_cell_fine;
++i)
vec_fine_ptr->local_element(indices_fine[i]) +=
read_dof_values(indices_coarse[i], vec_coarse);
}

indices_fine += scheme.n_dofs_per_cell_fine;
indices_coarse += scheme.n_dofs_per_cell_coarse;

if (fine_element_is_continuous)
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine;
++i)
vec_fine_ptr->local_element(indices_fine[i]) +=
read_dof_values(indices_coarse[i], vec_coarse) *
weights[i];
else
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine;
++i)
vec_fine_ptr->local_element(indices_fine[i]) +=
read_dof_values(indices_coarse[i], vec_coarse);
weights += scheme.n_dofs_per_cell_fine;
}

indices_fine += scheme.n_dofs_per_cell_fine;
indices_coarse += scheme.n_dofs_per_cell_coarse;

if (fine_element_is_continuous)
weights += scheme.n_dofs_per_cell_fine;
masks += 1;
}

continue;
Expand Down Expand Up @@ -2444,9 +2603,18 @@ MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
// ------------------------------ fine -------------------------------

// weight and write into dst vector
if (fine_element_is_continuous && this->masks.size() > 0)
{
weight_dofs_on_child<dim, Number>(masks->data(),
n_components,
scheme.degree_fine + 1,
evaluation_data_fine.begin());
masks += 1;
}

for (unsigned int v = 0; v < n_lanes_filled; ++v)
{
if (fine_element_is_continuous)
if (fine_element_is_continuous && this->masks.size() == 0)
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine; ++i)
vec_fine_ptr->local_element(indices_fine[i]) +=
evaluation_data_fine[i][v] * weights[i];
Expand Down Expand Up @@ -2519,43 +2687,62 @@ MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
const unsigned int *indices_coarse = level_dof_indices_coarse.data();
const unsigned int *indices_fine = level_dof_indices_fine.data();
const Number * weights = nullptr;
const std::array<VectorizedArray<Number>, Utilities::pow(3, dim)> *masks =
nullptr;

if (fine_element_is_continuous)
weights = this->weights.data();
{
weights = this->weights.data();
masks = this->masks.data();
}

for (const auto &scheme : schemes)
{
// identity -> take short cut and work directly on global vectors
if (scheme.prolongation_matrix.size() == 0 &&
scheme.prolongation_matrix_1d.size() == 0)
{
for (unsigned int cell = 0; cell < scheme.n_coarse_cells; ++cell)
for (unsigned int cell = 0; cell < scheme.n_coarse_cells;
cell += n_lanes)
{
if ((scheme.n_dofs_per_cell_fine != 0) &&
(scheme.n_dofs_per_cell_coarse != 0))
const unsigned int n_lanes_filled =
(cell + n_lanes > scheme.n_coarse_cells) ?
(scheme.n_coarse_cells - cell) :
n_lanes;

for (unsigned int v = 0; v < n_lanes_filled; ++v)
{
if ((scheme.n_dofs_per_cell_fine != 0) &&
(scheme.n_dofs_per_cell_coarse != 0))
{
if (fine_element_is_continuous)
for (unsigned int i = 0;
i < scheme.n_dofs_per_cell_fine;
++i)
distribute_local_to_global(
indices_coarse[i],
vec_fine_ptr->local_element(indices_fine[i]) *
weights[i],
this->vec_coarse);
else
for (unsigned int i = 0;
i < scheme.n_dofs_per_cell_fine;
++i)
distribute_local_to_global(
indices_coarse[i],
vec_fine_ptr->local_element(indices_fine[i]),
this->vec_coarse);
}

indices_fine += scheme.n_dofs_per_cell_fine;
indices_coarse += scheme.n_dofs_per_cell_coarse;

if (fine_element_is_continuous)
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine;
++i)
distribute_local_to_global(indices_coarse[i],
vec_fine_ptr->local_element(
indices_fine[i]) *
weights[i],
this->vec_coarse);
else
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine;
++i)
distribute_local_to_global(indices_coarse[i],
vec_fine_ptr->local_element(
indices_fine[i]),
this->vec_coarse);
weights += scheme.n_dofs_per_cell_fine;
}

indices_fine += scheme.n_dofs_per_cell_fine;
indices_coarse += scheme.n_dofs_per_cell_coarse;

if (fine_element_is_continuous)
weights += scheme.n_dofs_per_cell_fine;
masks += 1;
}

continue;
Expand Down Expand Up @@ -2583,7 +2770,7 @@ MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
// read from source vector and weight
for (unsigned int v = 0; v < n_lanes_filled; ++v)
{
if (fine_element_is_continuous)
if (fine_element_is_continuous && this->masks.size() == 0)
for (unsigned int i = 0; i < scheme.n_dofs_per_cell_fine; ++i)
evaluation_data_fine[i][v] =
vec_fine_ptr->local_element(indices_fine[i]) * weights[i];
Expand All @@ -2598,6 +2785,15 @@ MGTwoLevelTransfer<dim, LinearAlgebra::distributed::Vector<Number>>::
weights += scheme.n_dofs_per_cell_fine;
}

if (fine_element_is_continuous && this->masks.size() > 0)
{
weight_dofs_on_child<dim, Number>(masks->data(),
n_components,
scheme.degree_fine + 1,
evaluation_data_fine.begin());
masks += 1;
}

// ------------------------------ fine -------------------------------
for (int c = n_components - 1; c >= 0; --c)
{
Expand Down

0 comments on commit 5aedbc7

Please sign in to comment.