Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small modifications to performance/timing_matrix_free_kokkos #15779

Merged
merged 1 commit into from
Jul 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
91 changes: 51 additions & 40 deletions tests/performance/timing_matrix_free_kokkos.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ class LaplaceOperator<dim, fe_degree, Number, MemorySpace::Host>
using VectorType =
LinearAlgebra::distributed::Vector<Number, MemorySpace::Host>;

LaplaceOperator(const Mapping<dim> & mapping,
const DoFHandler<dim> & dof_handler,
const AffineConstraints<Number> &constraints,
const Quadrature<1> & quadrature)
LaplaceOperator() = default;

void
reinit(const Mapping<dim> & mapping,
const DoFHandler<dim> & dof_handler,
const AffineConstraints<Number> &constraints,
const Quadrature<1> & quadrature)
{
typename MatrixFree<dim, Number>::AdditionalData additional_data;
additional_data.mapping_update_flags = update_gradients;
Expand Down Expand Up @@ -160,16 +163,17 @@ class LaplaceOperator<dim, fe_degree, Number, MemorySpace::Default>
using VectorType =
LinearAlgebra::distributed::Vector<Number, MemorySpace::Default>;

LaplaceOperator(const Mapping<dim> & mapping,
const DoFHandler<dim> & dof_handler,
const AffineConstraints<Number> &constraints,
const Quadrature<1> & quadrature)
LaplaceOperator() = default;

void
reinit(const Mapping<dim> & mapping,
const DoFHandler<dim> & dof_handler,
const AffineConstraints<Number> &constraints,
const Quadrature<1> & quadrature)
{
typename CUDAWrappers::MatrixFree<dim, Number>::AdditionalData
additional_data;
additional_data.mapping_update_flags =
update_JxW_values | update_gradients |
update_quadrature_points; // TODO: remove update_quadrature_points
additional_data.mapping_update_flags = update_JxW_values | update_gradients;

matrix_free.reinit(
mapping, dof_handler, constraints, quadrature, additional_data);
Expand Down Expand Up @@ -225,7 +229,8 @@ run(const unsigned int n_refinements)
using Number = double;
using VectorType = LinearAlgebra::distributed::Vector<Number, MemorySpace>;

const unsigned n_repetitions = 100;
const unsigned n_repetitions_setup = 10;
const unsigned n_repetitions_vmult = 100;

parallel::distributed::Triangulation<dim> tria(comm);

Expand All @@ -246,17 +251,22 @@ run(const unsigned int n_refinements)

table.add_value("n_dofs", dof_handler.n_dofs());

AffineConstraints<Number> constraints;
AffineConstraints<Number> constraints;
LaplaceOperator<dim, degree, Number, MemorySpace> laplace_operator;

std::chrono::time_point<std::chrono::system_clock> temp =
std::chrono::time_point<std::chrono::system_clock> now_setup =
std::chrono::system_clock::now();
LaplaceOperator<dim, degree, Number, MemorySpace> laplace_operator(
mapping, dof_handler, constraints, quadrature);

const double dt_setup = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now() - temp)
.count() /
1e9;
for (unsigned int i = 0; i < n_repetitions_setup; ++i)
laplace_operator.reinit(mapping, dof_handler, constraints, quadrature);

double dt_setup = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now() - now_setup)
.count() /
1e9;

dt_setup = Utilities::MPI::sum(dt_setup, MPI_COMM_WORLD) /
Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD);

VectorType src, dst;

Expand All @@ -278,29 +288,19 @@ run(const unsigned int n_refinements)
dst = 0.0;
}

double dt_vmult = 0;

for (unsigned int i = 0; i < n_repetitions; ++i)
{
MPI_Barrier(MPI_COMM_WORLD);

std::chrono::time_point<std::chrono::system_clock> temp =
std::chrono::system_clock::now();

laplace_operator.vmult(dst, src);

MPI_Barrier(MPI_COMM_WORLD);
const std::chrono::time_point<std::chrono::system_clock> now_vmult =
std::chrono::system_clock::now();

const double dt = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now() - temp)
.count() /
1e9;
for (unsigned int i = 0; i < n_repetitions_vmult; ++i)
laplace_operator.vmult(dst, src);

dt_vmult += dt;
}
double dt_vmult = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::system_clock::now() - now_vmult)
.count() /
1e9;

dt_vmult = Utilities::MPI::sum(dt_vmult, MPI_COMM_WORLD) /
Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD) / n_repetitions;
Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD);


table.add_value("time_setup", dt_setup);
Expand Down Expand Up @@ -333,7 +333,18 @@ perform_single_measurement()
{
const unsigned int dim = 3;
const unsigned int fe_degree = 4;
const unsigned int n_refinements = 5;
unsigned int n_refinements = 5;

switch (get_testing_environment())
{
case TestingEnvironment::light:
break;
case TestingEnvironment::medium:
break;
case TestingEnvironment::heavy:
n_refinements += 1;
break;
}

const auto result0 = run<dim, fe_degree, MemorySpace::Host>(n_refinements);
const auto result1 = run<dim, fe_degree, MemorySpace::Default>(n_refinements);
Expand Down