Merge pull request #12881 from tamiko/pr_12830

[9.3] Cherry pick #12830
dealii · Oct 26, 2021 · f438b6a · f438b6a
2 parents 24ebbe0 + ee373ef
commit f438b6a
Show file tree

Hide file tree

Showing 3 changed files with 291 additions and 19 deletions.
diff --git a/source/distributed/tria_base.cc b/source/distributed/tria_base.cc
@@ -1441,20 +1441,23 @@ namespace parallel
         }
 
       // Write packed data to file simultaneously.
-      const unsigned int offset_fixed =
+      const MPI_Offset size_header =
         sizes_fixed_cumulative.size() * sizeof(unsigned int);
 
+      // Make sure we do the following computation in 64bit integers to be able
+      // to handle 4GB+ files:
+      const MPI_Offset my_global_file_position =
+        size_header + static_cast<MPI_Offset>(global_first_cell) *
+                        sizes_fixed_cumulative.back();
+
       const char *data = src_data_fixed.data();
 
-      ierr = MPI_File_write_at(
-        fh,
-        offset_fixed +
-          global_first_cell *
-            sizes_fixed_cumulative.back(), // global position in file
-        DEAL_II_MPI_CONST_CAST(data),
-        src_data_fixed.size(), // local buffer
-        MPI_CHAR,
-        MPI_STATUS_IGNORE);
+      ierr = MPI_File_write_at(fh,
+                               my_global_file_position,
+                               DEAL_II_MPI_CONST_CAST(data),
+                               src_data_fixed.size(), // local buffer
+                               MPI_CHAR,
+                               MPI_STATUS_IGNORE);
       AssertThrowMPI(ierr);
 
       ierr = MPI_File_close(&fh);
@@ -1606,17 +1609,21 @@ namespace parallel
       dest_data_fixed.resize(local_num_cells * sizes_fixed_cumulative.back());
 
       // Read packed data from file simultaneously.
-      const unsigned int offset =
+      const MPI_Offset size_header =
         sizes_fixed_cumulative.size() * sizeof(unsigned int);
 
-      ierr = MPI_File_read_at(
-        fh,
-        offset + global_first_cell *
-                   sizes_fixed_cumulative.back(), // global position in file
-        dest_data_fixed.data(),
-        dest_data_fixed.size(), // local buffer
-        MPI_CHAR,
-        MPI_STATUS_IGNORE);
+      // Make sure we do the following computation in 64bit integers to be able
+      // to handle 4GB+ files:
+      const MPI_Offset my_global_file_position =
+        size_header + static_cast<MPI_Offset>(global_first_cell) *
+                        sizes_fixed_cumulative.back();
+
+      ierr = MPI_File_read_at(fh,
+                              my_global_file_position,
+                              dest_data_fixed.data(),
+                              dest_data_fixed.size(), // local buffer
+                              MPI_CHAR,
+                              MPI_STATUS_IGNORE);
       AssertThrowMPI(ierr);
 
       ierr = MPI_File_close(&fh);

diff --git a/tests/distributed_grids/checkpointing_02.cc b/tests/distributed_grids/checkpointing_02.cc
@@ -0,0 +1,246 @@
+/* ---------------------------------------------------------------------
+ *
+ * Copyright (C) 2021 by the deal.II authors
+ *
+ * This file is part of the deal.II library.
+ *
+ * The deal.II library is free software; you can use it, redistribute
+ * it, and/or modify it under the terms of the GNU Lesser General
+ * Public License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * The full text of the license can be found in the file LICENSE.md at
+ * the top level directory of deal.II.
+ *
+ * ---------------------------------------------------------------------
+ */
+
+// Test (de)serialization of Trilinos vectors with checkpointing files >4GB. The
+// test here is of course simplified to run quickly.
+
+// set to true to run a test that generates a 5GB file:
+const bool big = false;
+
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/index_set.h>
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/utilities.h>
+
+#include <deal.II/distributed/grid_refinement.h>
+#include <deal.II/distributed/solution_transfer.h>
+#include <deal.II/distributed/tria.h>
+
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_values.h>
+
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+
+#include <deal.II/lac/la_parallel_vector.h>
+#include <deal.II/lac/sparsity_tools.h>
+#include <deal.II/lac/vector.h>
+
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/vector_tools.h>
+
+#include <fstream>
+#include <iostream>
+
+#include "../tests.h"
+
+
+template <int dim>
+class LaplaceProblem
+{
+public:
+  LaplaceProblem();
+
+  void
+  run(unsigned int n_cycles_global, unsigned int n_cycles_adaptive);
+
+private:
+  void
+  setup_system();
+  void
+  refine_grid();
+  void
+  output_results(const unsigned int cycle) const;
+
+  MPI_Comm mpi_communicator;
+
+  parallel::distributed::Triangulation<dim> triangulation;
+
+  FE_Q<dim>       fe;
+  DoFHandler<dim> dof_handler;
+
+  IndexSet locally_owned_dofs;
+  IndexSet locally_relevant_dofs;
+};
+
+template <int dim>
+LaplaceProblem<dim>::LaplaceProblem()
+  : mpi_communicator(MPI_COMM_WORLD)
+  , triangulation(
+      mpi_communicator,
+      typename Triangulation<dim>::MeshSmoothing(
+        Triangulation<dim>::smoothing_on_refinement |
+        Triangulation<dim>::smoothing_on_coarsening),
+      parallel::distributed::Triangulation<dim>::construct_multigrid_hierarchy)
+  , fe(2)
+  , dof_handler(triangulation)
+{}
+
+
+template <int dim>
+void
+LaplaceProblem<dim>::setup_system()
+{
+  dof_handler.distribute_dofs(fe);
+  locally_owned_dofs = dof_handler.locally_owned_dofs();
+  DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant_dofs);
+}
+
+template <int dim>
+void
+LaplaceProblem<dim>::refine_grid()
+{
+  // refine into a corner
+  Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
+  for (auto &cell : triangulation.active_cell_iterators())
+    {
+      if (cell->is_locally_owned())
+        estimated_error_per_cell(cell->active_cell_index()) =
+          cell->center().norm() * std::pow(cell->diameter(), 0.5);
+    }
+
+  parallel::distributed::GridRefinement::refine_and_coarsen_fixed_number(
+    triangulation, estimated_error_per_cell, 0.20, 0.0);
+
+  triangulation.execute_coarsening_and_refinement();
+}
+
+
+
+template <int dim>
+void
+LaplaceProblem<dim>::run(unsigned int n_cycles_global,
+                         unsigned int n_cycles_adaptive)
+{
+  using VectorType = TrilinosWrappers::MPI::Vector;
+
+  for (unsigned int cycle = 0; cycle < n_cycles_adaptive; ++cycle)
+    {
+      deallog << "Cycle " << 1 + cycle << " / " << n_cycles_adaptive << ':'
+              << std::endl;
+
+      if (cycle == 0)
+        {
+          GridGenerator::subdivided_hyper_cube(triangulation, 10);
+          triangulation.refine_global(n_cycles_global);
+        }
+      else
+        refine_grid();
+
+      deallog << "n_global_active_cells: "
+              << triangulation.n_global_active_cells()
+              << " n_global_levels: " << triangulation.n_global_levels()
+              << " ghost_owners.size: " << triangulation.ghost_owners().size()
+              << " level_ghost_owners.size: "
+              << triangulation.level_ghost_owners().size() << std::endl;
+
+      setup_system();
+
+      const unsigned int n_vectors = (big) ? 50 : 2;
+      {
+        deallog << "checkpointing..." << std::endl;
+        std::vector<VectorType> vectors(n_vectors);
+        VectorType              x(locally_owned_dofs, mpi_communicator);
+        for (unsigned int i = 0; i < n_vectors; ++i)
+          {
+            vectors[i].reinit(locally_owned_dofs,
+                              locally_relevant_dofs,
+                              mpi_communicator);
+            vectors[i] = x;
+            x.add(1.0);
+          }
+
+        std::vector<const VectorType *> x_system(n_vectors);
+        int                             i = 0;
+        for (auto &v : x_system)
+          {
+            v = &vectors[i];
+            ++i;
+          }
+
+        VectorType y(locally_owned_dofs,
+                     locally_relevant_dofs,
+                     mpi_communicator);
+        y = x;
+
+        // To be sure, use two SolutionTransfer objects, because the second one
+        // will get a large offset
+        parallel::distributed::SolutionTransfer<dim, VectorType> system_trans(
+          dof_handler);
+        parallel::distributed::SolutionTransfer<dim, VectorType> trans2(
+          dof_handler);
+
+        system_trans.prepare_for_serialization(x_system);
+        trans2.prepare_for_serialization(y);
+        triangulation.save("restart.mesh");
+      }
+
+      {
+        deallog << "resume..." << std::endl;
+        std::vector<VectorType> vectors(n_vectors);
+        for (unsigned int i = 0; i < n_vectors; ++i)
+          vectors[i].reinit(locally_owned_dofs, mpi_communicator);
+
+        std::vector<VectorType *> x_system(n_vectors);
+        int                       i = 0;
+        for (auto &v : x_system)
+          {
+            v = &vectors[i];
+            ++i;
+          }
+
+        VectorType y(locally_owned_dofs, mpi_communicator);
+
+        triangulation.coarsen_global(99);
+        triangulation.load("restart.mesh");
+
+        parallel::distributed::SolutionTransfer<dim, VectorType> system_trans(
+          dof_handler);
+        parallel::distributed::SolutionTransfer<dim, VectorType> trans2(
+          dof_handler);
+        system_trans.deserialize(x_system);
+        trans2.deserialize(y);
+
+        for (unsigned int i = 0; i < n_vectors; ++i)
+          deallog << "vec " << i << ": " << vectors[i].linfty_norm()
+                  << std::endl;
+        deallog << "vec y: " << y.linfty_norm() << std::endl;
+      }
+
+      deallog << std::endl;
+    }
+}
+
+
+int
+main(int argc, char *argv[])
+{
+  unsigned int n_cycles_global   = (big) ? 3 : 1;
+  unsigned int n_cycles_adaptive = 1;
+
+  Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
+  MPILogInitAll                    all;
+
+  LaplaceProblem<3> laplace_problem;
+  laplace_problem.run(n_cycles_global, n_cycles_adaptive);
+}
diff --git a/tests/distributed_grids/checkpointing_02.with_trilinos=true.mpirun=2.output b/tests/distributed_grids/checkpointing_02.with_trilinos=true.mpirun=2.output
@@ -0,0 +1,19 @@
+
+DEAL:0::Cycle 1 / 1:
+DEAL:0::n_global_active_cells: 8000 n_global_levels: 2 ghost_owners.size: 1 level_ghost_owners.size: 1
+DEAL:0::checkpointing...
+DEAL:0::resume...
+DEAL:0::vec 0: 0.00000
+DEAL:0::vec 1: 1.00000
+DEAL:0::vec y: 2.00000
+DEAL:0::
+
+DEAL:1::Cycle 1 / 1:
+DEAL:1::n_global_active_cells: 8000 n_global_levels: 2 ghost_owners.size: 1 level_ghost_owners.size: 1
+DEAL:1::checkpointing...
+DEAL:1::resume...
+DEAL:1::vec 0: 0.00000
+DEAL:1::vec 1: 1.00000
+DEAL:1::vec y: 2.00000
+DEAL:1::
+