Skip to content

Commit

Permalink
LB GPU communicator (#4919)
Browse files Browse the repository at this point in the history
Description of changes:
- use a more efficient communication scheme for LB fields stored in device memory
  • Loading branch information
kodiakhq[bot] committed May 10, 2024
2 parents 6c332aa + 3ca3f47 commit 4ee4c16
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 78 deletions.
1 change: 1 addition & 0 deletions samples/lbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@


lb_params = {'agrid': 1, 'density': 1, 'kinematic_viscosity': 1, 'tau': 0.01,
'single_precision': False,
'ext_force_density': [0, 0, -1.0 / (box_l**3)]}

if args.gpu:
Expand Down
10 changes: 10 additions & 0 deletions src/walberla_bridge/src/BoundaryHandling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@

namespace walberla {

/**
* @brief Boundary class optimized for sparse data.
*
* Instead of storing the boundary data on a vector field,
* store individual vectors in a map.
* The global cell is used as key.
*
* Requires a custom communicator:
* @ref walberla::field::communication::BoundaryPackInfo.
*/
template <typename T, typename BoundaryClass> class BoundaryHandling {
private:
/** Flag for domain cells, i.e. all cells. */
Expand Down
58 changes: 38 additions & 20 deletions src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
#include <stencil/D3Q27.h>
#if defined(__CUDACC__)
#include <gpu/AddGPUFieldToStorage.h>
#include <gpu/communication/GPUPackInfo.h>
#include <gpu/communication/MemcpyPackInfo.h>
#include <gpu/communication/UniformGPUScheme.h>
#endif

#include "../BoundaryHandling.hpp"
Expand Down Expand Up @@ -104,8 +105,11 @@ class LBWalberlaImpl : public LBWalberlaBase {
std::variant<CollisionModelThermalized, CollisionModelLeesEdwards>;

public:
// Type definitions
/** @brief Stencil for collision and streaming operations. */
using Stencil = stencil::D3Q19;
/** @brief Stencil for ghost communication (includes domain corners). */
using StencilFull = stencil::D3Q27;
/** @brief Lattice model (e.g. blockforest). */
using Lattice_T = LatticeWalberla::Lattice_T;

protected:
Expand All @@ -114,14 +118,25 @@ class LBWalberlaImpl : public LBWalberlaBase {
using VectorField = field::GhostLayerField<FT, uint_t{3u}>;
template <class Field>
using PackInfo = field::communication::PackInfo<Field>;
template <class Stencil>
using RegularCommScheme =
blockforest::communication::UniformBufferedScheme<Stencil>;
template <class Stencil>
using BoundaryCommScheme =
blockforest::communication::UniformBufferedScheme<Stencil>;
};

#if defined(__CUDACC__)
template <typename FT> struct FieldTrait<FT, lbmpy::Arch::GPU> {
using PdfField = gpu::GPUField<FT>;
using VectorField = gpu::GPUField<FT>;
template <class Field>
using PackInfo = gpu::communication::GPUPackInfo<Field>;
using PackInfo = gpu::communication::MemcpyPackInfo<Field>;
template <class Stencil>
using RegularCommScheme = gpu::communication::UniformGPUScheme<Stencil>;
template <class Stencil>
using BoundaryCommScheme =
blockforest::communication::UniformBufferedScheme<Stencil>;
};
#endif

Expand Down Expand Up @@ -245,21 +260,26 @@ class LBWalberlaImpl : public LBWalberlaBase {
* a full ghost communication. This is needed to properly update the corners
* of the ghost layer when setting cell velocities or populations.
*/
using FullCommunicator = blockforest::communication::UniformBufferedScheme<
typename stencil::D3Q27>;
using RegularFullCommunicator =
typename FieldTrait<FloatType, Architecture>::template RegularCommScheme<
typename stencil::D3Q27>;
using BoundaryFullCommunicator =
typename FieldTrait<FloatType, Architecture>::template BoundaryCommScheme<
typename stencil::D3Q27>;
/**
* @brief Regular communicator.
* We use the same directions as the stencil during integration.
*/
using PDFStreamingCommunicator =
blockforest::communication::UniformBufferedScheme<Stencil>;
typename FieldTrait<FloatType,
Architecture>::template RegularCommScheme<Stencil>;
template <class Field>
using PackInfo =
typename FieldTrait<FloatType, Architecture>::template PackInfo<Field>;

// communicators
std::shared_ptr<FullCommunicator> m_boundary_communicator;
std::shared_ptr<FullCommunicator> m_pdf_full_communicator;
std::shared_ptr<BoundaryFullCommunicator> m_boundary_communicator;
std::shared_ptr<RegularFullCommunicator> m_pdf_full_communicator;
std::shared_ptr<PDFStreamingCommunicator> m_pdf_streaming_communicator;

// ResetForce sweep + external force handling
Expand Down Expand Up @@ -396,28 +416,26 @@ class LBWalberlaImpl : public LBWalberlaBase {
m_pdf_streaming_communicator =
std::make_shared<PDFStreamingCommunicator>(blocks);
m_pdf_streaming_communicator->addPackInfo(
std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
std::make_shared<PackInfo<PdfField>>(m_pdf_field_id));
m_pdf_streaming_communicator->addPackInfo(
std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id,
n_ghost_layers));
std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id));

m_pdf_full_communicator = std::make_shared<FullCommunicator>(blocks);
m_pdf_full_communicator = std::make_shared<RegularFullCommunicator>(blocks);
m_pdf_full_communicator->addPackInfo(
std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
std::make_shared<PackInfo<PdfField>>(m_pdf_field_id));
m_pdf_full_communicator->addPackInfo(
std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id,
n_ghost_layers));
std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id));
m_pdf_full_communicator->addPackInfo(
std::make_shared<PackInfo<VectorField>>(m_velocity_field_id,
n_ghost_layers));
std::make_shared<PackInfo<VectorField>>(m_velocity_field_id));

m_boundary_communicator = std::make_shared<FullCommunicator>(blocks);
m_boundary_communicator =
std::make_shared<BoundaryFullCommunicator>(blocks);
m_boundary_communicator->addPackInfo(
std::make_shared<field::communication::PackInfo<FlagField>>(
m_flag_field_id, n_ghost_layers));
m_flag_field_id));
auto boundary_packinfo = std::make_shared<
field::communication::BoundaryPackInfo<FlagField, BoundaryModel>>(
m_flag_field_id, n_ghost_layers);
m_flag_field_id);
boundary_packinfo->setup_boundary_handle(m_lattice, m_boundary);
m_boundary_communicator->addPackInfo(boundary_packinfo);

Expand Down
2 changes: 1 addition & 1 deletion testsuite/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ python_test(FILE thole.py MAX_NUM_PROC 4)
python_test(FILE lb_slice.py MAX_NUM_PROC 2 GPU_SLOTS 1)
python_test(FILE lb_boundary_velocity.py MAX_NUM_PROC 1)
# python_test(FILE lb_boundary_volume_force.py MAX_NUM_PROC 2) # TODO
python_test(FILE lb_boundary_ghost_layer.py MAX_NUM_PROC 2)
python_test(FILE lb_boundary_ghost_layer.py MAX_NUM_PROC 2 GPU_SLOTS 1)
python_test(FILE lb_circular_couette.py MAX_NUM_PROC 2 GPU_SLOTS 1)
python_test(FILE lb_poiseuille.py MAX_NUM_PROC 4 GPU_SLOTS 1)
python_test(FILE lb_poiseuille_cylinder.py MAX_NUM_PROC 2 GPU_SLOTS 1)
Expand Down
20 changes: 18 additions & 2 deletions testsuite/python/lb_boundary_ghost_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,33 @@ def test_shape_setter(self):

@utx.skipIfMissingFeatures(["WALBERLA"])
@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
class LBPoiseuilleWalberlaSinglePrecision(TestCommon, ut.TestCase):
class LBPoiseuilleWalberlaSinglePrecisionCPU(TestCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": True}


@utx.skipIfMissingFeatures(["WALBERLA"])
@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
class LBPoiseuilleWalberlaDoublePrecision(TestCommon, ut.TestCase):
class LBPoiseuilleWalberlaDoublePrecisionCPU(TestCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": False}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
class LBPoiseuilleWalberlaSinglePrecisionGPU(TestCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": True}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
class LBPoiseuilleWalberlaDoublePrecisionGPU(TestCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": False}


if __name__ == "__main__":
ut.main()
15 changes: 11 additions & 4 deletions testsuite/python/lb_circular_couette.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def taylor_couette(v1, v2, r1, r2):
return a, b


@utx.skipIfMissingFeatures(["WALBERLA"])
class LBCouetteTest:

system = espressomd.System(box_l=(GRID_SIZE + [1, 1, 0]) * AGRID)
Expand All @@ -61,9 +60,9 @@ def test_taylor_couette_flow(self):
"""

system = self.system
lb_fluid = espressomd.lb.LBFluidWalberla(
lb_fluid = self.lb_class(
agrid=AGRID, density=0.5, kinematic_viscosity=3.2,
tau=system.time_step)
tau=system.time_step, **self.lb_params)
self.system.lb = lb_fluid

# set up two cylinders
Expand Down Expand Up @@ -159,7 +158,15 @@ class LBCircularCouetteWalberlaSinglePrecisionCPU(LBCouetteTest, ut.TestCase):
lb_params = {"single_precision": True}


@utx.skipIfMissingFeatures(["WALBERLA"])
@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBCircularCouetteWalberlaDoublePrecisionGPU(LBCouetteTest, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": False}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBCircularCouetteWalberlaSinglePrecisionGPU(LBCouetteTest, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": True}
Expand Down
22 changes: 15 additions & 7 deletions testsuite/python/lb_poiseuille.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,28 @@ def test_profile(self):


@utx.skipIfMissingFeatures(["WALBERLA"])
class LBPoiseuilleWalberla(LBPoiseuilleCommon, ut.TestCase):

"""Test for the Walberla implementation of the LB in double-precision."""

class LBPoiseuilleWalberlaDoublePrecisionCPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": False}


@utx.skipIfMissingFeatures(["WALBERLA"])
class LBPoiseuilleWalberlaSinglePrecision(LBPoiseuilleCommon, ut.TestCase):
class LBPoiseuilleWalberlaSinglePrecisionCPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": True}

"""Test for the Walberla implementation of the LB in single-precision."""

lb_class = espressomd.lb.LBFluidWalberla
@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBPoiseuilleWalberlaDoublePrecisionGPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": False}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBPoiseuilleWalberlaSinglePrecisionGPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": True}


Expand Down
22 changes: 15 additions & 7 deletions testsuite/python/lb_poiseuille_cylinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,20 +197,28 @@ def test_z(self):


@utx.skipIfMissingFeatures(["WALBERLA"])
class LBPoiseuilleWalberla(LBPoiseuilleCommon, ut.TestCase):

"""Test for the Walberla implementation of the LB in double-precision."""

class LBPoiseuilleWalberlaDoublePrecisionCPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": False}


@utx.skipIfMissingFeatures(["WALBERLA"])
class LBPoiseuilleWalberlaSinglePrecision(LBPoiseuilleCommon, ut.TestCase):
class LBPoiseuilleWalberlaSinglePrecisionCPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params = {"single_precision": True}

"""Test for the Walberla implementation of the LB in single-precision."""

lb_class = espressomd.lb.LBFluidWalberla
@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBPoiseuilleWalberlaDoublePrecisionGPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": True}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class LBPoiseuilleWalberlaSinglePrecisionGPU(LBPoiseuilleCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params = {"single_precision": True}


Expand Down
34 changes: 20 additions & 14 deletions testsuite/python/observable_cylindricalLB.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,8 @@ def setup_system_get_np_hist(self):
self.align_with_observable_frame(pos) +
self.cyl_transform_params.center)
vel_aligned.append(self.align_with_observable_frame(vel))
node_aligned = np.array(
np.rint(
np.array(pos_aligned) -
3 *
[0.5]),
dtype=int)
node_aligned = np.array(np.rint(np.array(pos_aligned) - 3 * [0.5]),
dtype=int)
self.system.part.add(pos=pos_aligned, v=vel_aligned)
self.params['ids'] = self.system.part.all().id

Expand Down Expand Up @@ -277,22 +273,32 @@ def test_cylindrical_lb_flux_density_obs(self):


@utx.skipIfMissingFeatures(["WALBERLA"])
class CylindricalLBObservableWalberla(
class CylindricalLBObservableWalberlaDoubePrecisionCPU(
CylindricalLBObservableCommon, ut.TestCase):

"""Test for the Walberla implementation of the LB in double-precision."""

lb_class = espressomd.lb.LBFluidWalberla
lb_params_extra = {"single_precision": False}


@utx.skipIfMissingFeatures(["WALBERLA"])
class CylindricalLBObservableWalberlaSinglePrecision(
CylindricalLBObservableWalberla, ut.TestCase):
class CylindricalLBObservableWalberlaSinglePrecisionCPU(
CylindricalLBObservableCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberla
lb_params_extra = {"single_precision": True}

"""Test for the Walberla implementation of the LB in single-precision."""

lb_class = espressomd.lb.LBFluidWalberla
@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class CylindricalLBObservableWalberlaDoubePrecisionGPU(
CylindricalLBObservableCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params_extra = {"single_precision": False}


@utx.skipIfMissingGPU()
@utx.skipIfMissingFeatures(["WALBERLA", "CUDA"])
class CylindricalLBObservableWalberlaSinglePrecisionGPU(
CylindricalLBObservableCommon, ut.TestCase):
lb_class = espressomd.lb.LBFluidWalberlaGPU
lb_params_extra = {"single_precision": True}


Expand Down

0 comments on commit 4ee4c16

Please sign in to comment.