Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for FE_Nothin in CUDA matrix-free #14272

Merged
merged 3 commits into from
Sep 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions include/deal.II/grid/filtered_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1499,9 +1499,9 @@ namespace IteratorFilters
ActiveFEIndexEqualTo::operator()(const Iterator &i) const
{
return only_locally_owned == true ?
(active_fe_indices.find(i->active_fe_index()) !=
active_fe_indices.end() &&
i->is_locally_owned()) :
(i->is_locally_owned() &&
active_fe_indices.find(i->active_fe_index()) !=
active_fe_indices.end()) :
active_fe_indices.find(i->active_fe_index()) !=
active_fe_indices.end();
}
Expand Down
12 changes: 11 additions & 1 deletion include/deal.II/matrix_free/hanging_nodes_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,10 @@ namespace internal
neighbor->level() == cell->level())
continue;

// Ignore if the neighbors are FE_Nothing
if (neighbor->get_fe().n_dofs_per_cell() == 0)
continue;

face |= 1 << direction;
}

Expand All @@ -549,10 +553,16 @@ namespace internal
std::find_if(line_to_cells[line_index].begin(),
line_to_cells[line_index].end(),
[&cell](const auto &edge_neighbor) {
DoFCellAccessor<dim, dim, false> dof_cell(
&edge_neighbor.first->get_triangulation(),
edge_neighbor.first->level(),
edge_neighbor.first->index(),
&cell->get_dof_handler());
return edge_neighbor.first->is_artificial() ==
false &&
edge_neighbor.first->level() <
cell->level();
cell->level() &&
dof_cell.get_fe().n_dofs_per_cell() > 0;
});

if (edge_neighbor == line_to_cells[line_index].end())
Expand Down
258 changes: 258 additions & 0 deletions tests/cuda/matrix_free_matrix_vector_26.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
// ---------------------------------------------------------------------
//
// Copyright (C) 2022 by the deal.II authors
//
// This file is part of the deal.II library.
//
// The deal.II library is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE.md at
// the top level directory of deal.II.
//
// ---------------------------------------------------------------------

// Same as matrix_free_matrix_vector_25 test but we only perform the operator
// evaluation on the cells that have a fe_index equal to 0

#include <deal.II/base/function.h>
#include <deal.II/base/utilities.h>

#include <deal.II/distributed/tria.h>

#include <deal.II/dofs/dof_handler.h>
#include <deal.II/dofs/dof_tools.h>

#include <deal.II/fe/fe_nothing.h>
#include <deal.II/fe/fe_q.h>
#include <deal.II/fe/fe_values.h>

#include "deal.II/grid/filtered_iterator.h"
#include <deal.II/grid/grid_generator.h>
#include <deal.II/grid/manifold_lib.h>

#include <deal.II/lac/affine_constraints.h>
#include <deal.II/lac/trilinos_sparse_matrix.h>
#include <deal.II/lac/trilinos_sparsity_pattern.h>

#include <deal.II/numerics/vector_tools.h>

#include <iostream>

#include "../tests.h"

#include "matrix_vector_mf.h"



template <int dim, int fe_degree>
void
test()
{
using Number = double;

parallel::distributed::Triangulation<dim> tria(MPI_COMM_WORLD);
GridGenerator::hyper_cube(tria);
tria.refine_global(1);
typename Triangulation<dim>::active_cell_iterator cell = tria.begin_active(),
endc = tria.end();
for (; cell != endc; ++cell)
if (cell->is_locally_owned())
if (cell->center().norm() < 0.2)
cell->set_refine_flag();
tria.execute_coarsening_and_refinement();
if (dim < 3 && fe_degree < 2)
tria.refine_global(2);
else
tria.refine_global(1);
if (tria.begin(tria.n_levels() - 1)->is_locally_owned())
tria.begin(tria.n_levels() - 1)->set_refine_flag();
if (tria.last()->is_locally_owned())
tria.last()->set_refine_flag();
tria.execute_coarsening_and_refinement();
cell = tria.begin_active();
for (unsigned int i = 0; i < 10 - 3 * dim; ++i)
{
cell = tria.begin_active();
unsigned int counter = 0;
for (; cell != endc; ++cell, ++counter)
if (cell->is_locally_owned())
if (counter % (7 - i) == 0)
cell->set_refine_flag();
tria.execute_coarsening_and_refinement();
}

hp::FECollection<dim> fe_collection;
fe_collection.push_back(FE_Q<dim>(fe_degree));
fe_collection.push_back(FE_Nothing<dim>());

DoFHandler<dim> dof(tria);
// Set FE_Index
for (auto &cell_ : dof.active_cell_iterators())
if (cell_->is_locally_owned())
{
if (cell_->center()[0] < 0.5)
cell_->set_active_fe_index(0);
else
cell_->set_active_fe_index(1);
}
dof.distribute_dofs(fe_collection);

IndexSet owned_set = dof.locally_owned_dofs();
IndexSet relevant_set;
DoFTools::extract_locally_relevant_dofs(dof, relevant_set);

AffineConstraints<double> constraints(relevant_set);
DoFTools::make_hanging_node_constraints(dof, constraints);
VectorTools::interpolate_boundary_values(dof,
0,
Functions::ZeroFunction<dim>(),
constraints);
constraints.close();

deallog << "Testing " << dof.get_fe().get_name() << std::endl;

MappingQ<dim> mapping(fe_degree);
CUDAWrappers::MatrixFree<dim, Number> mf_data;
const QGauss<1> quad(fe_degree + 1);
typename CUDAWrappers::MatrixFree<dim, Number>::AdditionalData
additional_data;
additional_data.mapping_update_flags = update_values | update_gradients |
update_JxW_values |
update_quadrature_points;
IteratorFilters::ActiveFEIndexEqualTo filter(0, true);
mf_data.reinit(mapping, dof, constraints, quad, filter, additional_data);

const unsigned int coef_size =
tria.n_locally_owned_active_cells() * std::pow(fe_degree + 1, dim);
MatrixFreeTest<dim,
fe_degree,
Number,
LinearAlgebra::distributed::Vector<Number, MemorySpace::CUDA>>
mf(mf_data, coef_size);
LinearAlgebra::distributed::Vector<Number, MemorySpace::CUDA> in_dev;
LinearAlgebra::distributed::Vector<Number, MemorySpace::CUDA> out_dev;
mf_data.initialize_dof_vector(in_dev);
mf_data.initialize_dof_vector(out_dev);

LinearAlgebra::ReadWriteVector<Number> rw_in(owned_set);
for (unsigned int i = 0; i < in_dev.local_size(); ++i)
{
const unsigned int glob_index = owned_set.nth_index_in_set(i);
if (constraints.is_constrained(glob_index))
continue;
rw_in.local_element(i) = random_value<double>();
}
in_dev.import(rw_in, VectorOperation::insert);
mf.vmult(out_dev, in_dev);

LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> out_host(
owned_set, MPI_COMM_WORLD);
LinearAlgebra::ReadWriteVector<Number> rw_out(owned_set);
rw_out.import(out_dev, VectorOperation::insert);
out_host.import(rw_out, VectorOperation::insert);

// assemble trilinos sparse matrix with
// (\nabla v, \nabla u) + (v, 10 * u) for
// reference
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> in_host(
owned_set, MPI_COMM_WORLD);
in_host.import(rw_in, VectorOperation::insert);
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host> ref(
owned_set, MPI_COMM_WORLD);
TrilinosWrappers::SparseMatrix sparse_matrix;
{
TrilinosWrappers::SparsityPattern csp(owned_set, MPI_COMM_WORLD);
DoFTools::make_sparsity_pattern(dof,
csp,
constraints,
true,
Utilities::MPI::this_mpi_process(
MPI_COMM_WORLD));
csp.compress();
sparse_matrix.reinit(csp);
}
{
QGauss<dim> quadrature_formula(fe_degree + 1);

FEValues<dim> fe_values(dof.get_fe(),
quadrature_formula,
update_values | update_gradients |
update_JxW_values);

const unsigned int dofs_per_cell = dof.get_fe().dofs_per_cell;
const unsigned int n_q_points = quadrature_formula.size();

FullMatrix<double> cell_matrix(dofs_per_cell, dofs_per_cell);
std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);

typename DoFHandler<dim>::active_cell_iterator cell = dof.begin_active(),
endc = dof.end();
for (; cell != endc; ++cell)
if (cell->is_locally_owned() && cell->active_fe_index() == 0)
{
cell_matrix = 0;
fe_values.reinit(cell);

for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
for (unsigned int i = 0; i < dofs_per_cell; ++i)
{
for (unsigned int j = 0; j < dofs_per_cell; ++j)
cell_matrix(i, j) +=
((fe_values.shape_grad(i, q_point) *
fe_values.shape_grad(j, q_point) +
10. * fe_values.shape_value(i, q_point) *
fe_values.shape_value(j, q_point)) *
fe_values.JxW(q_point));
}

cell->get_dof_indices(local_dof_indices);
constraints.distribute_local_to_global(cell_matrix,
local_dof_indices,
sparse_matrix);
}
}
sparse_matrix.compress(VectorOperation::add);

sparse_matrix.vmult(ref, in_host);
out_host -= ref;

const double diff_norm = out_host.linfty_norm();

deallog << "Norm of difference: " << diff_norm << std::endl << std::endl;
}


int
main(int argc, char **argv)
{
Utilities::MPI::MPI_InitFinalize mpi_initialization(
argc, argv, testing_max_num_threads());

unsigned int myid = Utilities::MPI::this_mpi_process(MPI_COMM_WORLD);
deallog.push(Utilities::int_to_string(myid));

init_cuda(true);

if (myid == 0)
{
initlog();
deallog << std::setprecision(4);

deallog.push("2d");
test<2, 1>();
deallog.pop();

deallog.push("3d");
test<3, 1>();
test<3, 2>();
deallog.pop();
}
else
{
test<2, 1>();
test<3, 1>();
test<3, 2>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

DEAL:0:2d::Testing FE_Q<2>(1)
DEAL:0:2d::Norm of difference: 0
DEAL:0:2d::
DEAL:0:3d::Testing FE_Q<3>(1)
DEAL:0:3d::Norm of difference: 0
DEAL:0:3d::
DEAL:0:3d::Testing FE_Q<3>(2)
DEAL:0:3d::Norm of difference: 0
DEAL:0:3d::
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

DEAL:0:2d::Testing FE_Q<2>(1)
DEAL:0:2d::Norm of difference: 0
DEAL:0:2d::
DEAL:0:3d::Testing FE_Q<3>(1)
DEAL:0:3d::Norm of difference: 0
DEAL:0:3d::
DEAL:0:3d::Testing FE_Q<3>(2)
DEAL:0:3d::Norm of difference: 0
DEAL:0:3d::