From 504cc72bfc0472afc30a9090eaf98a104e3ddd05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 24 Apr 2025 13:35:58 -0500 Subject: [PATCH 001/154] Reintroduce: higher order Faraday kernel implemented by @vanthieg --- src/engines/srpic.hpp | 36 +++++++++-- src/framework/parameters.cpp | 63 +++++++++++++++++-- src/global/defaults.h | 14 +++++ src/kernels/faraday_mink.hpp | 119 ++++++++++++++++++++++++++++------- 4 files changed, 200 insertions(+), 32 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index a10070c34..8f8215f1e 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -185,6 +185,24 @@ namespace ntt { if constexpr (M::CoordType == Coord::Cart) { // minkowski case const auto dx = math::sqrt(domain.mesh.metric.template h_<1, 1>({})); + const auto deltax = m_params.template get( + "algorithms.fieldsolver.deltax"); + const auto deltay = m_params.template get( + "algorithms.fieldsolver.deltay"); + const auto betaxy = m_params.template get( + "algorithms.fieldsolver.betaxy"); + const auto betayx = m_params.template get( + "algorithms.fieldsolver.betayx"); + const auto deltaz = m_params.template get( + "algorithms.fieldsolver.deltaz"); + const auto betaxz = m_params.template get( + "algorithms.fieldsolver.betaxz"); + const auto betazx = m_params.template get( + "algorithms.fieldsolver.betazx"); + const auto betayz = m_params.template get( + "algorithms.fieldsolver.betayz"); + const auto betazy = m_params.template get( + "algorithms.fieldsolver.betazy"); real_t coeff1, coeff2; if constexpr (M::Dim == Dim::_2D) { coeff1 = dT / SQR(dx); @@ -193,10 +211,20 @@ namespace ntt { coeff1 = dT / dx; coeff2 = ZERO; } - Kokkos::parallel_for( - "Faraday", - domain.mesh.rangeActiveCells(), - kernel::mink::Faraday_kernel(domain.fields.em, coeff1, coeff2)); + Kokkos::parallel_for("Faraday", + domain.mesh.rangeActiveCells(), + kernel::mink::Faraday_kernel(domain.fields.em, + coeff1, + coeff2, + deltax, + deltay, + betaxy, + betayx, + deltaz, + betaxz, + betazx, + betayz, + betazy)); } else { Kokkos::parallel_for("Faraday", domain.mesh.rangeActiveCells(), diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 8a30f52d9..42655b207 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0( - const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) -> std::pair { + auto get_dx0_V0(const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) + -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -416,6 +416,61 @@ namespace ntt { set("algorithms.toggles.deposit", toml::find_or(toml_data, "algorithms", "toggles", "deposit", true)); + /* [algorithms.fieldsolver] --------------------------------------------- */ + set("algorithms.fieldsolver.deltax", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "deltax", + defaults::fieldsolver::deltax)); + set("algorithms.fieldsolver.deltay", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "deltay", + defaults::fieldsolver::deltay)); + set("algorithms.fieldsolver.deltaz", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "deltaz", + defaults::fieldsolver::deltaz)); + set("algorithms.fieldsolver.betaxy", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betaxy", + defaults::fieldsolver::betaxy)); + set("algorithms.fieldsolver.betayx", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betayx", + defaults::fieldsolver::betayx)); + set("algorithms.fieldsolver.betaxz", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betaxz", + defaults::fieldsolver::betaxz)); + set("algorithms.fieldsolver.betazx", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betazx", + defaults::fieldsolver::betazx)); + set("algorithms.fieldsolver.betayz", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betayz", + defaults::fieldsolver::betayz)); + set("algorithms.fieldsolver.betazy", + toml::find_or(toml_data, + "algorithms", + "fieldsolver", + "betazy", + defaults::fieldsolver::betazy)); /* [algorithms.timestep] ------------------------------------------------ */ set("algorithms.timestep.CFL", toml::find_or(toml_data, "algorithms", "timestep", "CFL", defaults::cfl)); diff --git a/src/global/defaults.h b/src/global/defaults.h index e44103ed0..b5120d6e4 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -26,6 +26,20 @@ namespace ntt::defaults { const std::string ph_pusher = "Photon"; const timestep_t clear_interval = 100; + namespace fieldsolver { + const real_t deltax = 0.0; + + const real_t deltay = 0.0; + const real_t betaxy = 0.0; + const real_t betayx = 0.0; + + const real_t deltaz = 0.0; + const real_t betaxz = 0.0; + const real_t betazx = 0.0; + const real_t betayz = 0.0; + const real_t betazy = 0.0; + } // namespace fieldsolver + namespace qsph { const real_t r0 = 0.0; const real_t h = 0.0; diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index 6d249b999..1112e56e7 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -26,6 +26,15 @@ namespace kernel::mink { ndfield_t EB; const real_t coeff1; const real_t coeff2; + const real_t deltax; + const real_t deltay; + const real_t betaxy; + const real_t betayx; + const real_t deltaz; + const real_t betaxz; + const real_t betazx; + const real_t betayz; + const real_t betazy; public: /** @@ -33,15 +42,34 @@ namespace kernel::mink { * ! 2D: coeff1 = dt / dx^2, coeff2 = dt * ! 3D: coeff1 = dt / dx */ - Faraday_kernel(const ndfield_t& EB, real_t coeff1, real_t coeff2) + Faraday_kernel(const ndfield_t& EB, real_t coeff1, real_t coeff2 + , real_t deltax, real_t deltay, real_t betaxy, real_t betayx + , real_t deltaz, real_t betaxz, real_t betazx, real_t betayz + , real_t betazy) : EB { EB } , coeff1 { coeff1 } - , coeff2 { coeff2 } {} + , coeff2 { coeff2 } + , deltax { deltax } + , deltay { deltay } + , betaxy { betaxy } + , betayx { betayx } + , deltaz { deltaz } + , betaxz { betaxz } + , betazx { betazx } + , betayz { betayz } + , betazy { betazy } {} + + Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - EB(i1, em::bx2) += coeff1 * (EB(i1 + 1, em::ex3) - EB(i1, em::ex3)); - EB(i1, em::bx3) += coeff1 * (EB(i1, em::ex2) - EB(i1 + 1, em::ex2)); + const auto alphax = ONE - THREE * deltax; + EB(i1, em::bx2) += coeff1 * ( + + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + EB(i1, em::bx3) += coeff1 * ( + - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) + - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -49,13 +77,27 @@ namespace kernel::mink { Inline void operator()(index_t i1, index_t i2) const { if constexpr (D == Dim::_2D) { - EB(i1, i2, em::bx1) += coeff1 * - (EB(i1, i2, em::ex3) - EB(i1, i2 + 1, em::ex3)); - EB(i1, i2, em::bx2) += coeff1 * - (EB(i1 + 1, i2, em::ex3) - EB(i1, i2, em::ex3)); - EB(i1, i2, em::bx3) += coeff2 * - (EB(i1, i2 + 1, em::ex1) - EB(i1, i2, em::ex1) + - EB(i1, i2, em::ex2) - EB(i1 + 1, i2, em::ex2)); + const auto alphax = ONE - TWO * betaxy - THREE * deltax; + const auto alphay = ONE - TWO * betayx - THREE * deltay; + EB(i1, i2, em::bx1) += coeff1 * ( + - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) + - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) + - betayx * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 + 1, i2 , em::ex3)) + - betayx * (EB(i1 - 1, i2 + 1, em::ex3) - EB(i1 - 1, i2 , em::ex3))); + EB(i1, i2, em::bx2) += coeff1 * ( + + alphax * (EB(i1 + 1, i2 , em::ex3) - EB(i1 , i2 , em::ex3)) + + deltax * (EB(i1 + 2, i2 , em::ex3) - EB(i1 - 1, i2 , em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 , i2 + 1, em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, em::ex3) - EB(i1 , i2 - 1, em::ex3))); + EB(i1, i2, em::bx3) += coeff2 * ( + + alphay * (EB(i1 , i2 + 1, em::ex1) - EB(i1 , i2 , em::ex1)) + + deltay * (EB(i1 , i2 + 2, em::ex1) - EB(i1 , i2 - 1, em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, em::ex1) - EB(i1 + 1, i2 , em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, em::ex1) - EB(i1 - 1, i2 , em::ex1)) + - alphax * (EB(i1 + 1, i2 , em::ex2) - EB(i1 , i2 , em::ex2)) + - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) + - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) + - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); @@ -64,19 +106,48 @@ namespace kernel::mink { Inline void operator()(index_t i1, index_t i2, index_t i3) const { if constexpr (D == Dim::_3D) { - EB(i1, i2, i3, em::bx1) += coeff1 * (EB(i1, i2, i3 + 1, em::ex2) - - EB(i1, i2, i3, em::ex2) + - EB(i1, i2, i3, em::ex3) - - EB(i1, i2 + 1, i3, em::ex3)); - EB(i1, i2, i3, em::bx2) += coeff1 * (EB(i1 + 1, i2, i3, em::ex3) - - EB(i1, i2, i3, em::ex3) + - EB(i1, i2, i3, em::ex1) - - EB(i1, i2, i3 + 1, em::ex1)); - EB(i1, i2, i3, em::bx3) += coeff1 * (EB(i1, i2 + 1, i3, em::ex1) - - EB(i1, i2, i3, em::ex1) + - EB(i1, i2, i3, em::ex2) - - EB(i1 + 1, i2, i3, em::ex2)); - + const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; + const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; + const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; + EB(i1, i2, i3, em::bx1) += coeff1 * ( + + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) + + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) + + betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 + 1, i2 , i3 , em::ex2)) + + betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) + + betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) + + betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) + - alphay * (EB(i1 , i2 + 1, i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) + - deltay * (EB(i1 , i2 + 2, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) + - betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 + 1, i2 , i3 , em::ex3)) + - betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) + - betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) + - betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3))); + EB(i1, i2, i3, em::bx2) += coeff1 * ( + + alphax * (EB(i1 + 1, i2 , i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) + + deltax * (EB(i1 + 2, i2 , i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 , i2 + 1, i3 , em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) + + betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) + + betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3)) + - alphaz * (EB(i1 , i2 , i3 + 1, em::ex1) - EB(i1 , i2 , i3 , em::ex1)) + - deltaz * (EB(i1 , i2 , i3 + 2, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) + - betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) + - betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) + - betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 + 1, i3 , em::ex1)) + - betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1))); + EB(i1, i2, i3, em::bx3) += coeff1 * ( + + alphay * (EB(i1 , i2 + 1, i3 , em::ex1) - EB(i1 , i2 , i3 , em::ex1)) + + deltay * (EB(i1 , i2 + 2, i3 , em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) + + betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 , i3 + 1, em::ex1)) + + betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) + - alphax * (EB(i1 + 1, i2 , i3 , em::ex2) - EB(i1 , i2 , i3 , em::ex2)) + - deltax * (EB(i1 + 2, i2 , i3 , em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) + - betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) + - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) + - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) + - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } From 7d7ce883f924221cf5745e4318d5ec23d17ea101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 24 Apr 2025 13:36:50 -0500 Subject: [PATCH 002/154] Reintroduce: example settings for higher order field solver --- input.example.toml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/input.example.toml b/input.example.toml index bd5d52b06..960d4b7b2 100644 --- a/input.example.toml +++ b/input.example.toml @@ -253,6 +253,21 @@ # @type: float: > 0 gamma_rad = "" + [algorithms.fieldsolver] + # Yee - all 0.0 - default + # 1D + deltax = -0.065 + # 2D + deltay = -0.065 + betaxy = -0.065 + betayx = -0.065 + # 3D - not yet tested + deltaz = 0.0 + betaxz = 0.0 + betazx = 0.0 + betayz = 0.0 + betazy = 0.0 + [particles] # Fiducial number of particles per cell: # @required From 0fc9c49823c1378ef8d017a8ae419b750c785e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 24 Apr 2025 15:59:59 -0500 Subject: [PATCH 003/154] improved field interpolation scheme implemented by @vanthieg --- src/kernels/particle_pusher_sr.hpp | 399 +++++++++++++++-------------- 1 file changed, 200 insertions(+), 199 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 6bd4e1714..831d070ec 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -833,100 +833,126 @@ namespace kernel::sr { const int i { i1(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + // first order real_t c0, c1; + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + // Ex1 - // interpolate to nodes - c0 = HALF * (EB(i, em::ex1) + EB(i - 1, em::ex1)); - c1 = HALF * (EB(i, em::ex1) + EB(i + 1, em::ex1)); - // interpolate from nodes to the particle position - e0[0] = c0 * (ONE - dx1_) + c1 * dx1_; + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::ex1); + c1 = EB(i + indx, em::ex1); + e0[0] = c0 * pondmx + c1 * pondpx; // Ex2 + // Interpolate --- (primal) c0 = EB(i, em::ex2); c1 = EB(i + 1, em::ex2); - e0[1] = c0 * (ONE - dx1_) + c1 * dx1_; + e0[1] = c0 * ponpmx + c1 * ponppx; // Ex3 + // Interpolate --- (primal) c0 = EB(i, em::ex3); c1 = EB(i + 1, em::ex3); - e0[2] = c0 * (ONE - dx1_) + c1 * dx1_; - + e0[2] = c0 * ponpmx + c1 * ponppx; // Bx1 + // Interpolate --- (primal) c0 = EB(i, em::bx1); c1 = EB(i + 1, em::bx1); - b0[0] = c0 * (ONE - dx1_) + c1 * dx1_; + b0[0] = c0 * ponpmx + c1 * ponppx; // Bx2 - c0 = HALF * (EB(i - 1, em::bx2) + EB(i, em::bx2)); - c1 = HALF * (EB(i, em::bx2) + EB(i + 1, em::bx2)); - b0[1] = c0 * (ONE - dx1_) + c1 * dx1_; + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx2); + c1 = EB(i + indx, em::bx2); + b0[1] = c0 * pondmx + c1 * pondpx; // Bx3 - c0 = HALF * (EB(i - 1, em::bx3) + EB(i, em::bx3)); - c1 = HALF * (EB(i, em::bx3) + EB(i + 1, em::bx3)); - b0[2] = c0 * (ONE - dx1_) + c1 * dx1_; + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx3); + c1 = EB(i + indx, em::bx3); + b0[2] = c0 * pondmx + c1 * pondpx; } else if constexpr (D == Dim::_2D) { const int i { i1(p) + static_cast(N_GHOSTS) }; const int j { i2(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; const auto dx2_ { static_cast(dx2(p)) }; + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + // first order real_t c000, c100, c010, c110, c00, c10; + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, em::ex1) + EB(i - 1, j, em::ex1)); - c100 = HALF * (EB(i, j, em::ex1) + EB(i + 1, j, em::ex1)); - c010 = HALF * (EB(i, j + 1, em::ex1) + EB(i - 1, j + 1, em::ex1)); - c110 = HALF * (EB(i, j + 1, em::ex1) + EB(i + 1, j + 1, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[0] = c00 * (ONE - dx2_) + c10 * dx2_; + // Interpolate --- (dual, primal) + c000 = EB(i - 1 + indx, j, em::ex1); + c100 = EB(i + indx, j, em::ex1); + c010 = EB(i - 1 + indx, j + 1, em::ex1); + c110 = EB(i + indx, j + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + e0[0] = c00 * ponpmy + c10 * ponppy; // Ex2 - c000 = HALF * (EB(i, j, em::ex2) + EB(i, j - 1, em::ex2)); - c100 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j - 1, em::ex2)); - c010 = HALF * (EB(i, j, em::ex2) + EB(i, j + 1, em::ex2)); - c110 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j + 1, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[1] = c00 * (ONE - dx2_) + c10 * dx2_; + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::ex2); + c100 = EB(i + 1, j - 1 + indy, em::ex2); + c010 = EB(i, j + indy, em::ex2); + c110 = EB(i + 1, j + indy, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[1] = c00 * pondmy + c10 * pondpy; // Ex3 + // Interpolate -- (primal, primal) c000 = EB(i, j, em::ex3); c100 = EB(i + 1, j, em::ex3); c010 = EB(i, j + 1, em::ex3); c110 = EB(i + 1, j + 1, em::ex3); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[2] = c00 * (ONE - dx2_) + c10 * dx2_; + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[2] = c00 * ponpmy + c10 * ponppy; // Bx1 - c000 = HALF * (EB(i, j, em::bx1) + EB(i, j - 1, em::bx1)); - c100 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j - 1, em::bx1)); - c010 = HALF * (EB(i, j, em::bx1) + EB(i, j + 1, em::bx1)); - c110 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[0] = c00 * (ONE - dx2_) + c10 * dx2_; + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::bx1); + c100 = EB(i + 1, j - 1 + indy, em::bx1); + c010 = EB(i, j + indy, em::bx1); + c110 = EB(i + 1, j + indy, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + b0[0] = c00 * pondmy + c10 * pondpy; // Bx2 - c000 = HALF * (EB(i - 1, j, em::bx2) + EB(i, j, em::bx2)); - c100 = HALF * (EB(i, j, em::bx2) + EB(i + 1, j, em::bx2)); - c010 = HALF * (EB(i - 1, j + 1, em::bx2) + EB(i, j + 1, em::bx2)); - c110 = HALF * (EB(i, j + 1, em::bx2) + EB(i + 1, j + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[1] = c00 * (ONE - dx2_) + c10 * dx2_; + // Interpolate -- (dual, primal) + c000 = EB(i - 1 + indx, j, em::bx2); + c100 = EB(i + indx, j, em::bx2); + c010 = EB(i - 1 + indx, j + 1, em::bx2); + c110 = EB(i + indx, j + 1, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[1] = c00 * ponpmy + c10 * ponppy; // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, em::bx3) + EB(i - 1, j, em::bx3) + - EB(i, j - 1, em::bx3) + EB(i, j, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, em::bx3) + EB(i, j, em::bx3) + - EB(i + 1, j - 1, em::bx3) + EB(i + 1, j, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, em::bx3) + EB(i - 1, j + 1, em::bx3) + - EB(i, j, em::bx3) + EB(i, j + 1, em::bx3)); - c110 = INV_4 * (EB(i, j, em::bx3) + EB(i, j + 1, em::bx3) + - EB(i + 1, j, em::bx3) + EB(i + 1, j + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[2] = c00 * (ONE - dx2_) + c10 * dx2_; + // Interpolate -- (dual, dual) + c000 = EB(i - 1 + indx, j - 1 + indy, em::bx3); + c100 = EB(i + indx, j - 1 + indy, em::bx3); + c010 = EB(i - 1 + indx, j + indy, em::bx3); + c110 = EB(i + indx, j + indy, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[2] = c00 * pondmy + c10 * pondpy; } else if constexpr (D == Dim::_3D) { const int i { i1(p) + static_cast(N_GHOSTS) }; const int j { i2(p) + static_cast(N_GHOSTS) }; @@ -935,157 +961,132 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + int indz = static_cast(dx3_ + HALF); + // first order real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, c11, c0, c1; - // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, k, em::ex1) + EB(i - 1, j, k, em::ex1)); - c100 = HALF * (EB(i, j, k, em::ex1) + EB(i + 1, j, k, em::ex1)); - c010 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i - 1, j + 1, k, em::ex1)); - c110 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i + 1, j + 1, k, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - // interpolate to nodes - c001 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i - 1, j, k + 1, em::ex1)); - c101 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i + 1, j, k + 1, em::ex1)); - c011 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i - 1, j + 1, k + 1, em::ex1)); - c111 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i + 1, j + 1, k + 1, em::ex1)); - // interpolate from nodes to the particle position - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[0] = c0 * (ONE - dx3_) + c1 * dx3_; + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + real_t ponpmz = ONE - dx3_; + real_t ponppz = dx3_; - // Ex2 - c000 = HALF * (EB(i, j, k, em::ex2) + EB(i, j - 1, k, em::ex2)); - c100 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j - 1, k, em::ex2)); - c010 = HALF * (EB(i, j, k, em::ex2) + EB(i, j + 1, k, em::ex2)); - c110 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j + 1, k, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c001 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j - 1, k + 1, em::ex2)); - c101 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j - 1, k + 1, em::ex2)); - c011 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j + 1, k + 1, em::ex2)); - c111 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j + 1, k + 1, em::ex2)); - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[1] = c0 * (ONE - dx3_) + c1 * dx3_; + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + real_t pondmz = static_cast(indz + ONE) - (dx3_ + HALF); + real_t pondpz = ONE - pondmz; + // Ex1 + // Interpolate --- (dual, primal, primal) + c000 = EB(i - 1 + indx, j, k, em::ex1); + c100 = EB(i + indx, j, k, em::ex1); + c010 = EB(i - 1 + indx, j + 1, k, em::ex1); + c110 = EB(i + indx, j + 1, k, em::ex1); + c001 = EB(i - 1 + indx, j, k + 1, em::ex1); + c101 = EB(i + indx, j, k + 1, em::ex1); + c011 = EB(i - 1 + indx, j + 1, k + 1, em::ex1); + c111 = EB(i + indx, j + 1, k + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[0] = c0 * ponpmz + c1 * ponppz; + // Ex2 + // Interpolate -- (primal, dual, primal) + c000 = EB(i, j - 1 + indy, k, em::ex2); + c100 = EB(i + 1, j - 1 + indy, k, em::ex2); + c010 = EB(i, j + indy, k, em::ex2); + c110 = EB(i + 1, j + indy, k, em::ex2); + c001 = EB(i, j - 1 + indy, k + 1, em::ex2); + c101 = EB(i + 1, j - 1 + indy, k + 1, em::ex2); + c011 = EB(i, j + indy, k + 1, em::ex2); + c111 = EB(i + 1, j + indy, k + 1, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + e0[1] = c0 * ponpmz + c1 * ponppz; // Ex3 - c000 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k - 1, em::ex3)); - c100 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k - 1, em::ex3)); - c010 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k - 1, em::ex3)); - c110 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k - 1, em::ex3)); - c001 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k + 1, em::ex3)); - c101 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k + 1, em::ex3)); - c011 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k + 1, em::ex3)); - c111 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k + 1, em::ex3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[2] = c0 * (ONE - dx3_) + c1 * dx3_; + // Interpolate -- (primal, primal, dual) + c000 = EB(i, j, k - 1 + indz, em::ex3); + c100 = EB(i + 1, j, k - 1 + indz, em::ex3); + c010 = EB(i, j + 1, k - 1 + indz, em::ex3); + c110 = EB(i + 1, j + 1, k - 1 + indz, em::ex3); + c001 = EB(i, j, k + indz, em::ex3); + c101 = EB(i + 1, j, k + indz, em::ex3); + c011 = EB(i, j + 1, k + indz, em::ex3); + c111 = EB(i + 1, j + 1, k + indz, em::ex3); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[2] = c0 * pondmz + c1 * pondpz; // Bx1 - c000 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j - 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j - 1, k - 1, em::bx1)); - c100 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j - 1, k, em::bx1) + - EB(i + 1, j, k - 1, em::bx1) + EB(i + 1, j - 1, k - 1, em::bx1)); - c001 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j, k + 1, em::bx1) + - EB(i, j - 1, k, em::bx1) + EB(i, j - 1, k + 1, em::bx1)); - c101 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k + 1, em::bx1) + - EB(i + 1, j - 1, k, em::bx1) + EB(i + 1, j - 1, k + 1, em::bx1)); - c010 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j + 1, k - 1, em::bx1)); - c110 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k - 1, em::bx1) + - EB(i + 1, j + 1, k - 1, em::bx1) + EB(i + 1, j + 1, k, em::bx1)); - c011 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j + 1, k + 1, em::bx1) + EB(i, j, k + 1, em::bx1)); - c111 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j + 1, k, em::bx1) + - EB(i + 1, j + 1, k + 1, em::bx1) + EB(i + 1, j, k + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[0] = c0 * (ONE - dx3_) + c1 * dx3_; - + // Interpolate -- (primal, dual, dual) + c000 = EB(i, j - 1 + indy, k - 1 + indz, em::bx1); + c100 = EB(i + 1, j - 1 + indy, k - 1 + indz, em::bx1); + c010 = EB(i, j + indy, k - 1 + indz, em::bx1); + c110 = EB(i + 1, j + indy, k - 1 + indz, em::bx1); + c001 = EB(i, j - 1 + indy, k + indz, em::bx1); + c101 = EB(i + 1, j - 1 + indy, k + indz, em::bx1); + c011 = EB(i, j + indy, k + indz, em::bx1); + c111 = EB(i + 1, j + indy, k + indz, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + b0[0] = c0 * pondmz + c1 * pondpz; // Bx2 - c000 = INV_4 * (EB(i - 1, j, k - 1, em::bx2) + EB(i - 1, j, k, em::bx2) + - EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2)); - c100 = INV_4 * (EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2) + - EB(i + 1, j, k - 1, em::bx2) + EB(i + 1, j, k, em::bx2)); - c001 = INV_4 * (EB(i - 1, j, k, em::bx2) + EB(i - 1, j, k + 1, em::bx2) + - EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2)); - c101 = INV_4 * (EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2) + - EB(i + 1, j, k, em::bx2) + EB(i + 1, j, k + 1, em::bx2)); - c010 = INV_4 * - (EB(i - 1, j + 1, k - 1, em::bx2) + EB(i - 1, j + 1, k, em::bx2) + - EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2)); - c110 = INV_4 * - (EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2) + - EB(i + 1, j + 1, k - 1, em::bx2) + EB(i + 1, j + 1, k, em::bx2)); - c011 = INV_4 * - (EB(i - 1, j + 1, k, em::bx2) + EB(i - 1, j + 1, k + 1, em::bx2) + - EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2)); - c111 = INV_4 * - (EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2) + - EB(i + 1, j + 1, k, em::bx2) + EB(i + 1, j + 1, k + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[1] = c0 * (ONE - dx3_) + c1 * dx3_; - + // Interpolate -- (dual, primal, dual) + c000 = EB(i - 1 + indx, j, k - 1 + indz, em::bx2); + c100 = EB(i + indx, j, k - 1 + indz, em::bx2); + c010 = EB(i - 1 + indx, j + 1, k - 1 + indz, em::bx2); + c110 = EB(i + indx, j + 1, k - 1 + indz, em::bx2); + c001 = EB(i - 1 + indx, j, k + indz, em::bx2); + c101 = EB(i + indx, j, k + indz, em::bx2); + c011 = EB(i - 1 + indx, j + 1, k + indz, em::bx2); + c111 = EB(i + indx, j + 1, k + indz, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[1] = c0 * pondmz + c1 * pondpz; // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, k, em::bx3) + EB(i - 1, j, k, em::bx3) + - EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3) + - EB(i + 1, j - 1, k, em::bx3) + EB(i + 1, j, k, em::bx3)); - c001 = INV_4 * - (EB(i - 1, j - 1, k + 1, em::bx3) + EB(i - 1, j, k + 1, em::bx3) + - EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3)); - c101 = INV_4 * - (EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3) + - EB(i + 1, j - 1, k + 1, em::bx3) + EB(i + 1, j, k + 1, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, k, em::bx3) + EB(i - 1, j + 1, k, em::bx3) + - EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3)); - c110 = INV_4 * (EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3) + - EB(i + 1, j, k, em::bx3) + EB(i + 1, j + 1, k, em::bx3)); - c011 = INV_4 * - (EB(i - 1, j, k + 1, em::bx3) + EB(i - 1, j + 1, k + 1, em::bx3) + - EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3)); - c111 = INV_4 * - (EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3) + - EB(i + 1, j, k + 1, em::bx3) + EB(i + 1, j + 1, k + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[2] = c0 * (ONE - dx3_) + c1 * dx3_; + // Interpolate -- (dual, dual, primal) + c000 = EB(i - 1 + indx, j - 1 + indy, k, em::bx3); + c100 = EB(i + indx, j - 1 + indy, k, em::bx3); + c010 = EB(i - 1 + indx, j + indy, k, em::bx3); + c110 = EB(i + indx, j + indy, k, em::bx3); + c001 = EB(i - 1 + indx, j - 1 + indy, k + 1, em::bx3); + c101 = EB(i + indx, j - 1 + indy, k + 1, em::bx3); + c011 = EB(i - 1 + indx, j + indy, k + 1, em::bx3); + c111 = EB(i + indx, j + indy, k + 1, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[2] = c0 * ponpmz + c1 * ponppz; } } From 3d9ac42874f1a95b79cbace279eb5f0f5fb841fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:22:04 -0500 Subject: [PATCH 004/154] Esirkepov Eq. 24, 31, 38 (wip) --- src/kernels/currents_deposit.hpp | 691 +++++++++++++++++++++---------- 1 file changed, 470 insertions(+), 221 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 98d00a9b0..3e97f40b2 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -42,6 +42,75 @@ namespace kernel { const array_t tag; const M metric; const real_t charge, inv_dt; + const int interpolation_order; + + private: + Inline void find_indices_and_S(const int i_pos, + array_t& indices, + array_t& S) { + + // find contributing indices + // ToDo: check if this is correct + const auto i_min = floor( + i_pos - (static_cast(interpolation_order) - ONE) * HALF); + + for (int i = 0; i <= interpolation_order; i++) { + indices[i] = i_min + i; + } + + if constexpr (interpolation_order == 1) { + const auto dx = static_cast(x - indices[0]); + S[0] = ONE - dx; + S[1] = dx; + } else if constexpr (interpolation_order == 2) { + // Esirkepov 2001, Eq. 24 + const auto dx = static_cast(indices[1] - x); + S[0] = HALF * SQR(HALF + dx); + S[1] = static_cast(0.75) - SQR(dx); + S[2] = HALF * SQR(HALF - dx); + } else { + // throw error + } + } + + Inline void apply_shape_function(array_t& S0, + array_t& S1, + array_t& PS0, + array_t& PS1, + array_t& IS0, + array_t& IS1, + int* i_min, + int* i_max) { + + // check displacement + const auto shift_I = IS0[0] - IS1[0]; + + if (shift_I > 0) { + // positive shift in x1 direction + for (int i = 0; i <= interpolation_order; i++) { + S0[i] = PS0[i]; + S1[i + 1] = PS1[i]; + } + i_min = IS0[0]; + i_max = IS1[interpolation_order]; + } else if (shift_I < 0) { + // negative shift in x1 direction + for (int i = 0; i <= interpolation_order; i++) { + S0[i + 1] = PS0[i]; + S1[i] = PS1[i]; + } + i_min = IS1[0]; + i_max = IS0[interpolation_order]; + } else { + // no shift + for (int i = 0; i <= interpolation_order; i++) { + S0[i] = PS0[i]; + S1[i] = PS1[i]; + } + i_min = IS1[0]; + i_max = IS1[interpolation_order]; + } + } public: /** @@ -68,7 +137,8 @@ namespace kernel { const array_t& tag, const M& metric, real_t charge, - real_t dt) + const real_t dt, + int interpolation_order) : J { scatter_cur } , i1 { i1 } , i2 { i2 } @@ -90,7 +160,8 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } {} + , inv_dt { ONE / dt } + , interpolation_order { interpolation_order } {} /** * @brief Iteration of the loop over particles. @@ -143,241 +214,419 @@ namespace kernel { const real_t coeff { weight(p) * charge }; - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + - static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - - dx1_prev(p)) * - coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * - coeff * inv_dt }; - - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + // ToDo: interpolation_order as parameter + if constexpr (interpolation_order == 0) { + /* + Zig-zag deposit + */ + + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + + dxp_r_1 - dx1_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * - (ONE - Wx1_2) * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * - Wx1_2 * - Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + if constexpr (D == Dim::_2D) { + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + static_cast( + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * + coeff * inv_dt }; + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * + coeff * inv_dt }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } } - } - } - }; + } else { + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing arrays: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0x, S1x, S0y, S1y, S0z, S1z + - Indices this shape function contributes to + IS0, IS1 + - Value of the shape function at the cell positions + PS0, PS1 + - Density composition matrix + Wx, Wy, Wz + */ + + // shape function arrays at time 0 and 1 + vec_t PS0 { ZERO }; + vec_t PS1 { ZERO }; + // indices the shape function contributes to + vec_t IS0 { ZERO }; // ToDo: integer + vec_t IS1 { ZERO }; // ToDo: integer + + // minimum and maximum contributing indices + vec_t i_min { ZERO }; // ToDo: integer + vec_t i_max { ZERO }; // ToDo: integer + + if constexpr (D == Dim::_1D) { + // throw error + } else if constexpr (D == Dim::_2D) { + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + // define weight functions + vec_t Wx { ZERO }; + vec_t Wy { ZERO }; + vec_t Wz { ZERO }; + + /* + x - direction + */ + // shape function in x direction + vec_t S0x { ZERO }; + vec_t S1x { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i1(p), IS0, PS0); + find_indices_and_PS(i1_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + + /* + y - direction + */ + // shape function in x direction + vec_t S0y { ZERO }; + vec_t S1y { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i2(p), IS0, PS0); + find_indices_and_PS(i2_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 38 + Wx[i][j] = HALF * (S1x[i] - S0x[i]) * (S0y[j] + S1y[j]); + Wy[i][j] = HALF * (S1x[i] + S0x[i]) * (S0y[j] - S1y[j]); + Wz[i][j] = THIRD * (S1y[j] * (HALF * S0x[i] + S1x[i]) + + S0y[j] * (HALF * S1x[i] + S0x[i])); + } + } + // ToDo: actual J update + + } else if constexpr (D == Dim::_3D) { + + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + + // define weight functions + vec_t + Wx { ZERO }; + vec_t + Wy { ZERO }; + vec_t + Wz { ZERO }; + + /* + x - direction + */ + // shape function in x direction + vec_t S0x { ZERO }; + vec_t S1x { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i1(p), IS0, PS0); + find_indices_and_PS(i1_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + + /* + y - direction + */ + // shape function in y direction + vec_t S0y { ZERO }; + vec_t S1y { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i2(p), IS0, PS0); + find_indices_and_PS(i2_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + + /* + z - direction + */ + // shape function in z direction + vec_t S0z { ZERO }; + vec_t S1z { ZERO }; + + // find indices and define shape function + find_indices_and_PS(i3(p), IS0, PS0); + find_indices_and_PS(i3_prev(p), IS1, PS1); + + // apply shape function + apply_shape_function(S0z, S1z, PS0, PS1, IS0, IS1, &i_min[2], &i_max[2]); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + for (int k = 0; k < interp_order + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } + + // ToDo: actual J update + } + }; -} // namespace kernel + } // namespace kernel #undef i_di_to_Xi From c984fe03cd36646eaaba0bacce970fff76cb4568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:31:11 -0500 Subject: [PATCH 005/154] added THIRD --- src/global/utils/numeric.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index cc1191b62..a2da7727c 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -39,6 +39,7 @@ inline constexpr float FIVE = 5.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; +inline constexpr float THIRD = 0.333333f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -54,6 +55,7 @@ inline constexpr double FIVE = 5.0; inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; +inline constexpr double THIRD = 0.3333333333333333; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; From 1be51852b9d8f01c1bb43db666bdef1759a9a5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:35:47 -0500 Subject: [PATCH 006/154] bugfix --- src/kernels/currents_deposit.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 3e97f40b2..7f9136fe9 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -615,7 +615,7 @@ namespace kernel { ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - Wz[i][j][k] = THIRD * (S1z[k] - S0[k]) * + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * (S0x[i] * S0y[j] + S1x[i] * S1y[j] + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); } From 180358ef5389e7a345d996d5ea88d5aab5d7cffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 3 May 2025 16:36:00 -0500 Subject: [PATCH 007/154] formatting --- src/global/utils/numeric.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index a2da7727c..9ff262ed8 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -39,7 +39,7 @@ inline constexpr float FIVE = 5.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; -inline constexpr float THIRD = 0.333333f; +inline constexpr float THIRD = 0.333333f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; From b81f433c045aaf8af31f6d867d8b72a85224a194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 10 May 2025 19:43:06 -0500 Subject: [PATCH 008/154] redefine vectors to variables + explicit loop unrolling for 2D --- src/kernels/currents_deposit.hpp | 657 ++++++++++++++++++++++--------- 1 file changed, 468 insertions(+), 189 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 7f9136fe9..841392f55 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,70 +45,101 @@ namespace kernel { const int interpolation_order; private: - Inline void find_indices_and_S(const int i_pos, - array_t& indices, - array_t& S) { - // find contributing indices - // ToDo: check if this is correct - const auto i_min = floor( - i_pos - (static_cast(interpolation_order) - ONE) * HALF); - - for (int i = 0; i <= interpolation_order; i++) { - indices[i] = i_min + i; - } - - if constexpr (interpolation_order == 1) { - const auto dx = static_cast(x - indices[0]); - S[0] = ONE - dx; - S[1] = dx; - } else if constexpr (interpolation_order == 2) { - // Esirkepov 2001, Eq. 24 - const auto dx = static_cast(indices[1] - x); - S[0] = HALF * SQR(HALF + dx); - S[1] = static_cast(0.75) - SQR(dx); - S[2] = HALF * SQR(HALF - dx); - } else { - // throw error - } - } - - Inline void apply_shape_function(array_t& S0, - array_t& S1, - array_t& PS0, - array_t& PS1, - array_t& IS0, - array_t& IS1, - int* i_min, - int* i_max) { - - // check displacement - const auto shift_I = IS0[0] - IS1[0]; - - if (shift_I > 0) { - // positive shift in x1 direction - for (int i = 0; i <= interpolation_order; i++) { - S0[i] = PS0[i]; - S1[i + 1] = PS1[i]; - } - i_min = IS0[0]; - i_max = IS1[interpolation_order]; - } else if (shift_I < 0) { - // negative shift in x1 direction - for (int i = 0; i <= interpolation_order; i++) { - S0[i + 1] = PS0[i]; - S1[i] = PS1[i]; - } - i_min = IS1[0]; - i_max = IS0[interpolation_order]; + Inline void shape_function(real_t* S0_0, + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + int* const i_max int_t i, + const real_t dx, + const int_t i_prev, + const real_t dx_prev) { + + /* + Shape function per particle is a 4 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let x be the particle position at the current timestep + Let * be the particle position at the previous timestep + + + (-1) 0 1 2 3 + ___________________________________ + | | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______| + | | x | x* | x* | * | // shift_i = 1 + |______|______|______|______|______| + | * | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______| + */ + + // find shift in indices + const auto shift_x { i_prev - i - (dx_prev - dx) }; + + // find indices and define shape function + if (shift_x > 0) { + /* + (-1) 0 1 2 3 + ___________________________________ + | | x | x* | x* | * | // shift_i = 1 + |______|______|______|______|______| + */ + ix_min = i_prev - 2; + ix_max = i + 2; + // shape function, ToDo: fix + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; + + S1_0 = ZERO; + S1_1 = HALF * SQR(HALF + dx); + S1_2 = static_cast(0.75) - SQR(dx); + S1_3 = HALF * SQR(HALF - dx); + } else if (shift_x < 0) { + /* + (-1) 0 1 2 3 + ___________________________________ + | * | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______| + */ + ix_min = i - 2; + ix_max = i_prev + 2; + // shape function, ToDo: fix + S0_0 = ZERO; + S0_1 = HALF * SQR(HALF + dx_prev); + S0_2 = static_cast(0.75) - SQR(dx_prev); + S0_3 = HALF * SQR(HALF - dx_prev); + + S1_0 = HALF * SQR(HALF + dx); + S1_1 = static_cast(0.75) - SQR(dx); + S1_2 = HALF * SQR(HALF - dx); + S1_3 = ZERO; } else { - // no shift - for (int i = 0; i <= interpolation_order; i++) { - S0[i] = PS0[i]; - S1[i] = PS1[i]; - } - i_min = IS1[0]; - i_max = IS1[interpolation_order]; + /* + (-1) 0 1 2 3 + ___________________________________ + | | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______| + */ + ix_min = i - 2; + ix_max = i + 2; + // shape function, ToDo: fix + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; + + S1_0 = HALF * SQR(HALF + dx); + S1_1 = static_cast(0.75) - SQR(dx); + S1_2 = HALF * SQR(HALF - dx); + S1_3 = ZERO; } } @@ -457,89 +488,134 @@ namespace kernel { Higher order charge conserving current deposition based on Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - We need to define the follwowing arrays: + We need to define the follwowing variable: - Shape functions in spatial directions for the particle position before and after the current timestep. - S0x, S1x, S0y, S1y, S0z, S1z - - Indices this shape function contributes to - IS0, IS1 - - Value of the shape function at the cell positions - PS0, PS1 + S0_*, S1_* - Density composition matrix - Wx, Wy, Wz + Wx_*, Wy_*, Wz_* */ - // shape function arrays at time 0 and 1 - vec_t PS0 { ZERO }; - vec_t PS1 { ZERO }; - // indices the shape function contributes to - vec_t IS0 { ZERO }; // ToDo: integer - vec_t IS1 { ZERO }; // ToDo: integer - - // minimum and maximum contributing indices - vec_t i_min { ZERO }; // ToDo: integer - vec_t i_max { ZERO }; // ToDo: integer - - if constexpr (D == Dim::_1D) { - // throw error - } else if constexpr (D == Dim::_2D) { - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // define weight functions - vec_t Wx { ZERO }; - vec_t Wy { ZERO }; - vec_t Wz { ZERO }; - - /* + /* x - direction - */ - // shape function in x direction - vec_t S0x { ZERO }; - vec_t S1x { ZERO }; + */ - // find indices and define shape function - find_indices_and_PS(i1(p), IS0, PS0); - find_indices_and_PS(i1_prev(p), IS1, PS1); + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3; + // indices of the shape function + uint ix_min, ix_max; + // find indices and define shape function + shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, + &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, + &ix_min, &ix_max, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); - // apply shape function - apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + if constexpr (D == Dim::_1D) { + // ToDo + } + else if constexpr (D == Dim::_2D) { /* y - direction */ - // shape function in x direction - vec_t S0y { ZERO }; - vec_t S1y { ZERO }; + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; // find indices and define shape function - find_indices_and_PS(i2(p), IS0, PS0); - find_indices_and_PS(i2_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 38 - Wx[i][j] = HALF * (S1x[i] - S0x[i]) * (S0y[j] + S1y[j]); - Wy[i][j] = HALF * (S1x[i] + S0x[i]) * (S0y[j] - S1y[j]); - Wz[i][j] = THIRD * (S1y[j] * (HALF * S0x[i] + S1x[i]) + - S0y[j] * (HALF * S1x[i] + S0x[i])); - } - } - // ToDo: actual J update - - } else if constexpr (D == Dim::_3D) { + // Unrolled calculations for Wx + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + // ToDo: check if this is what I need const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; @@ -548,79 +624,282 @@ namespace kernel { (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * - static_cast(INV_2) }; - - // define weight functions - vec_t - Wx { ZERO }; - vec_t - Wy { ZERO }; - vec_t - Wz { ZERO }; - - /* - x - direction - */ - // shape function in x direction - vec_t S0x { ZERO }; - vec_t S1x { ZERO }; - - // find indices and define shape function - find_indices_and_PS(i1(p), IS0, PS0); - find_indices_and_PS(i1_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0x, S1x, PS0, PS1, IS0, IS1, &i_min[0], &i_max[0]); + // ToDo: actual J update + auto J_acc = J.access(); + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 39 + J_acc(N_GHOSTS + i_min[0] + i, + N_GHOSTS + i_min[1] + j, + cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; + } + } + } + else if constexpr (D == Dim::_3D) { /* y - direction */ - // shape function in y direction - vec_t S0y { ZERO }; - vec_t S1y { ZERO }; + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; // find indices and define shape function - find_indices_and_PS(i2(p), IS0, PS0); - find_indices_and_PS(i2_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0y, S1y, PS0, PS1, IS0, IS1, &i_min[1], &i_max[1]); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); /* z - direction */ - // shape function in z direction - vec_t S0z { ZERO }; - vec_t S1z { ZERO }; + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + uint iz_min, iz_max; // find indices and define shape function - find_indices_and_PS(i3(p), IS0, PS0); - find_indices_and_PS(i3_prev(p), IS1, PS1); - - // apply shape function - apply_shape_function(S0z, S1z, PS0, PS1, IS0, IS1, &i_min[2], &i_max[2]); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - for (int k = 0; k < interp_order + 2; ++k) { - // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - ( S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - } - } - } + shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, + &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, + &iz_min, &iz_max, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + + // // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } + + // Unrolled calculations for Wx, Wy, and Wz + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); // ToDo: actual J update } From dfc7165c586c0d415ac8dc521373dac3905503f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 10 May 2025 19:48:27 -0500 Subject: [PATCH 009/154] moved interpolation order from variable to compiler directive --- src/kernels/currents_deposit.hpp | 1285 +++++++++++++++--------------- 1 file changed, 639 insertions(+), 646 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 841392f55..24dcf17e5 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,20 +45,19 @@ namespace kernel { const int interpolation_order; private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - int* const i_max int_t i, - const real_t dx, - const int_t i_prev, - const real_t dx_prev) { + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + int* const i_max int_t i, + const real_t dx, + const int_t i_prev, + const real_t dx_prev) { /* Shape function per particle is a 4 element array. @@ -168,8 +167,7 @@ namespace kernel { const array_t& tag, const M& metric, real_t charge, - const real_t dt, - int interpolation_order) + const real_t dt) : J { scatter_cur } , i1 { i1 } , i2 { i2 } @@ -191,8 +189,7 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } - , interpolation_order { interpolation_order } {} + , inv_dt { ONE / dt } {} /** * @brief Iteration of the loop over particles. @@ -246,665 +243,661 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter - if constexpr (interpolation_order == 0) { - /* - Zig-zag deposit - */ +#if (SHAPE_FUNCTION_ORDER == 1) + /* + Zig-zag deposit + */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + + static_cast( + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - + dx1_prev(p)) * + coeff * inv_dt }; + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * + coeff * inv_dt }; + + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + - dxp_r_1 - dx1_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - + if constexpr (D == Dim::_2D) { const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * + (ONE - Wx1_2) * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * + Wx1_2 * + Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * static_cast(INV_2) }; - - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * - static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + - static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * - coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * - coeff * inv_dt }; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; - } + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } - } else { - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ + } +#else // SHAPE_FUNCTION_ORDER + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* + */ + + /* + x - direction + */ + + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3; + // indices of the shape function + uint ix_min, ix_max; + // find indices and define shape function + shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, + &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, + &ix_min, &ix_max, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { /* - x - direction + y - direction */ // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; + real_t S0y_0, S0y_1, S0y_2, S0y_3; // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; + real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint ix_min, ix_max; + uint iy_min, iy_max; // find indices and define shape function - shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, - &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, - &ix_min, &ix_max, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + + // Calculate weight function + // Unrolled calculations for Wx + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; - if constexpr (D == Dim::_1D) { - // ToDo - } - else if constexpr (D == Dim::_2D) { - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min, iy_max; - // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - - // Calculate weight function - // Unrolled calculations for Wx - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; + // ToDo: actual J update + auto J_acc = J.access(); - // ToDo: actual J update - auto J_acc = J.access(); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 39 - J_acc(N_GHOSTS + i_min[0] + i, - N_GHOSTS + i_min[1] + j, - cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; - } + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + // Esirkepov 2001, Eq. 39 + J_acc(N_GHOSTS + i_min[0] + i, + N_GHOSTS + i_min[1] + j, + cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; } } - else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min, iy_max; - // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - - /* - z - direction - */ - - // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; - // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; - // indices of the shape function - uint iz_min, iz_max; - // find indices and define shape function - shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, - &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, - &iz_min, &iz_max, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - - // // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } - - // Unrolled calculations for Wx, Wy, and Wz - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - // ToDo: actual J update - } - }; + } else if constexpr (D == Dim::_3D) { + /* + y - direction + */ + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + uint iy_min, iy_max; + // find indices and define shape function + shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, + &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, + &iy_min, &iy_max, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + + /* + z - direction + */ + + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + uint iz_min, iz_max; + // find indices and define shape function + shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, + &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, + &iz_min, &iz_max, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + + // // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } + + // Unrolled calculations for Wx, Wy, and Wz + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + // ToDo: actual J update + }; +#endif // SHAPE_FUNCTION_ORDER } // namespace kernel #undef i_di_to_Xi From f1b8cd7c7732678b9a3d2690c1b13a1d39d88048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:43:45 -0500 Subject: [PATCH 010/154] first attempt at 2D current deposit with Esirkepov --- src/kernels/currents_deposit.hpp | 268 +++++++++++++++++++++---------- 1 file changed, 182 insertions(+), 86 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 24dcf17e5..b7a0cb698 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -45,19 +45,19 @@ namespace kernel { const int interpolation_order; private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - int* const i_max int_t i, - const real_t dx, - const int_t i_prev, - const real_t dx_prev) { + Inline void shape_function(real_t* S0_0, + real_t* S0_1, + real_t* S0_2, + real_t* S0_3, + real_t* S1_0, + real_t* S1_1, + real_t* S1_2, + real_t* S1_3, + int* i_min, + const index_t i, + const real_t dx, + const index_t i_prev, + const real_t dx_prev) { /* Shape function per particle is a 4 element array. @@ -89,13 +89,12 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - ix_min = i_prev - 2; - ix_max = i + 2; + i_min = i_prev - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; S1_0 = ZERO; S1_1 = HALF * SQR(HALF + dx); @@ -108,13 +107,12 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - ix_min = i - 2; - ix_max = i_prev + 2; + i_min = i - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + dx_prev); - S0_2 = static_cast(0.75) - SQR(dx_prev); - S0_3 = HALF * SQR(HALF - dx_prev); + S0_0 = ZERO; + S0_1 = HALF * SQR(HALF + dx_prev); + S0_2 = static_cast(0.75) - SQR(dx_prev); + S0_3 = HALF * SQR(HALF - dx_prev); S1_0 = HALF * SQR(HALF + dx); S1_1 = static_cast(0.75) - SQR(dx); @@ -127,13 +125,12 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - ix_min = i - 2; - ix_max = i + 2; + i_min = i - 2 + N_GHOSTS; // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + S0_0 = HALF * SQR(HALF + dx_prev); + S0_1 = static_cast(0.75) - SQR(dx_prev); + S0_2 = HALF * SQR(HALF - dx_prev); + S0_3 = ZERO; S1_0 = HALF * SQR(HALF + dx); S1_1 = static_cast(0.75) - SQR(dx); @@ -497,17 +494,25 @@ namespace kernel { */ // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; + real_t S0x_0, S0x_1, S0x_2, S0x_3; // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; + real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function - uint ix_min, ix_max; + ncells_t ix_min; // find indices and define shape function - shape_function(&Sx0_0, &Sx0_1, &Sx0_2, &Sx0_3, - &Sx1_0, &Sx1_1, &Sx1_2, &Sx1_3, - &ix_min, &ix_max, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + shape_function(&Sx0_0, + &Sx0_1, + &Sx0_2, + &Sx0_3, + &Sx1_0, + &Sx1_1, + &Sx1_2, + &Sx1_3, + &ix_min, + i1(p), + dx1(p), + i1_prev(p), + dx1_prev(p)); if constexpr (D == Dim::_1D) { // ToDo @@ -518,17 +523,25 @@ namespace kernel { */ // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; + real_t S0y_0, S0y_1, S0y_2, S0y_3; // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; + real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint iy_min, iy_max; + ncells_t iy_min; // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + shape_function(&Sy0_0, + &Sy0_1, + &Sy0_2, + &Sy0_3, + &Sy1_0, + &Sy1_1, + &Sy1_2, + &Sy1_3, + &iy_min, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); // Calculate weight function // Unrolled calculations for Wx @@ -622,15 +635,80 @@ namespace kernel { // ToDo: actual J update auto J_acc = J.access(); - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - // Esirkepov 2001, Eq. 39 - J_acc(N_GHOSTS + i_min[0] + i, - N_GHOSTS + i_min[1] + j, - cur::jx1) += coeff * inv_dt * Wx[i][j] * dxp_r_1; - } - } + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + + /* + z - component, simulated direction + */ + const real_t QVz = vp[2] * coeff; + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + } else if constexpr (D == Dim::_3D) { /* y - direction @@ -643,11 +721,20 @@ namespace kernel { // indices of the shape function uint iy_min, iy_max; // find indices and define shape function - shape_function(&Sy0_0, &Sy0_1, &Sy0_2, &Sy0_3, - &Sy1_0, &Sy1_1, &Sy1_2, &Sy1_3, - &iy_min, &iy_max, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + shape_function(&Sy0_0, + &Sy0_1, + &Sy0_2, + &Sy0_3, + &Sy1_0, + &Sy1_1, + &Sy1_2, + &Sy1_3, + &iy_min, + &iy_max, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); /* z - direction @@ -660,31 +747,40 @@ namespace kernel { // indices of the shape function uint iz_min, iz_max; // find indices and define shape function - shape_function(&Sz0_0, &Sz0_1, &Sz0_2, &Sz0_3, - &Sz1_0, &Sz1_1, &Sz1_2, &Sz1_3, - &iz_min, &iz_max, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - - // // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } + shape_function(&Sz0_0, + &Sz0_1, + &Sz0_2, + &Sz0_3, + &Sz1_0, + &Sz1_1, + &Sz1_2, + &Sz1_3, + &iz_min, + &iz_max, + i3(p), + dx3(p), + i3_prev(p), + dx3_prev(p)); + + // Calculate weight function + for (int i = 0; i < interp_order + 2; ++i) { + for (int j = 0; j < interp_order + 2; ++j) { + for (int k = 0; k < interp_order + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } // Unrolled calculations for Wx, Wy, and Wz const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * From cb56279be440121cef766c4a704970e0810b107c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:50:36 -0500 Subject: [PATCH 011/154] more local calculation of weight functions --- src/kernels/currents_deposit.hpp | 137 ++++++++++++++++--------------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index b7a0cb698..2cfd679ad 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -543,8 +543,25 @@ namespace kernel { i2_prev(p), dx2_prev(p)); - // Calculate weight function - // Unrolled calculations for Wx + + + // ToDo: check if this is what I need + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * + static_cast(INV_2) }; + + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * + static_cast(INV_2) }; + + // ToDo: actual J update + auto J_acc = J.access(); + + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + // Calculate weight function - unrolled const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); @@ -565,6 +582,31 @@ namespace kernel { const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ // Unrolled calculations for Wy const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); @@ -586,6 +628,32 @@ namespace kernel { const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + + /* + z - component, simulated direction + */ // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + S0y_0 * (HALF * S1x_0 + S0x_0)); @@ -623,71 +691,6 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // ToDo: actual J update - auto J_acc = J.access(); - - // Esirkepov 2001, Eq. 39 - /* - x - component - */ - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; - - /* - y - component - */ - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; - - - /* - z - component, simulated direction - */ const real_t QVz = vp[2] * coeff; J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; From 9f50dea3ac9c8902ba6331052245f7db0a7a5121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 14:54:40 -0500 Subject: [PATCH 012/154] switch to row-major order --- src/kernels/currents_deposit.hpp | 86 ++++++++++++++++---------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 2cfd679ad..81fafca54 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -543,8 +543,6 @@ namespace kernel { i2_prev(p), dx2_prev(p)); - - // ToDo: check if this is what I need const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * @@ -582,26 +580,26 @@ namespace kernel { const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; /* @@ -628,28 +626,27 @@ namespace kernel { const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - + const real_t Qdydt = coeff * inv_dt * dyp_r_1; + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; /* z - component, simulated direction @@ -691,25 +688,26 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const real_t QVz = vp[2] * coeff; + const real_t QVz = vp[2] * coeff; + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { From 6b32791b27146f5d82826de35d71d94bdee85afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 13 May 2025 15:04:56 -0500 Subject: [PATCH 013/154] first attempt at current deposit jx1 in 3D --- src/kernels/currents_deposit.hpp | 91 ++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 81fafca54..8e6be3d92 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -720,7 +720,7 @@ namespace kernel { // shape function at current timestep real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function - uint iy_min, iy_max; + uint iy_min; // find indices and define shape function shape_function(&Sy0_0, &Sy0_1, @@ -731,7 +731,6 @@ namespace kernel { &Sy1_2, &Sy1_3, &iy_min, - &iy_max, i2(p), dx2(p), i2_prev(p), @@ -746,7 +745,7 @@ namespace kernel { // shape function at current timestep real_t S1z_0, S1z_1, S1z_2, S1z_3; // indices of the shape function - uint iz_min, iz_max; + uint iz_min; // find indices and define shape function shape_function(&Sz0_0, &Sz0_1, @@ -757,7 +756,6 @@ namespace kernel { &Sz1_2, &Sz1_3, &iz_min, - &iz_max, i3(p), dx3(p), i3_prev(p), @@ -992,6 +990,91 @@ namespace kernel { ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + + J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + // + // + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + // + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + // + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + // + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + // + // + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + // + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + // + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + // + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + // + // + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + // ToDo: actual J update }; #endif // SHAPE_FUNCTION_ORDER From bf788a9a88fbde5827a500d2ad49aa5fc9e7ea4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:06:32 -0500 Subject: [PATCH 014/154] more efficient memory access (this time for real) --- src/kernels/currents_deposit.hpp | 122 +++++++++++++++---------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 8e6be3d92..78e02f0ea 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -691,23 +691,23 @@ namespace kernel { const real_t QVz = vp[2] * coeff; J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { @@ -993,86 +993,86 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; // // - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt* Wx_1_3_0; J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; // // - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; // // - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; // - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; // - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; // - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; // ToDo: actual J update From b6a2811c8e333a745e6c9d13e49d00400b0f52fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:35:49 -0500 Subject: [PATCH 015/154] first attempt at y/z deposit in 3D --- src/kernels/currents_deposit.hpp | 711 ++++++++++++++++++++++++++++--- 1 file changed, 652 insertions(+), 59 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 78e02f0ea..a99241c33 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -500,14 +500,14 @@ namespace kernel { // indices of the shape function ncells_t ix_min; // find indices and define shape function - shape_function(&Sx0_0, - &Sx0_1, - &Sx0_2, - &Sx0_3, - &Sx1_0, - &Sx1_1, - &Sx1_2, - &Sx1_3, + shape_function(&S0x_0, + &S0x_1, + &S0x_2, + &S0x_3, + &S1x_0, + &S1x_1, + &S1x_2, + &S1x_3, &ix_min, i1(p), dx1(p), @@ -529,14 +529,14 @@ namespace kernel { // indices of the shape function ncells_t iy_min; // find indices and define shape function - shape_function(&Sy0_0, - &Sy0_1, - &Sy0_2, - &Sy0_3, - &Sy1_0, - &Sy1_1, - &Sy1_2, - &Sy1_3, + shape_function(&S0y_0, + &S0y_1, + &S0y_2, + &S0y_3, + &S1y_0, + &S1y_1, + &S1y_2, + &S1y_3, &iy_min, i2(p), dx2(p), @@ -695,7 +695,7 @@ namespace kernel { J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; @@ -722,14 +722,14 @@ namespace kernel { // indices of the shape function uint iy_min; // find indices and define shape function - shape_function(&Sy0_0, - &Sy0_1, - &Sy0_2, - &Sy0_3, - &Sy1_0, - &Sy1_1, - &Sy1_2, - &Sy1_3, + shape_function(&S0y_0, + &S0y_1, + &S0y_2, + &S0y_3, + &S1y_0, + &S1y_1, + &S1y_2, + &S1y_3, &iy_min, i2(p), dx2(p), @@ -747,14 +747,14 @@ namespace kernel { // indices of the shape function uint iz_min; // find indices and define shape function - shape_function(&Sz0_0, - &Sz0_1, - &Sz0_2, - &Sz0_3, - &Sz1_0, - &Sz1_1, - &Sz1_2, - &Sz1_3, + shape_function(&S0z_0, + &S0z_1, + &S0z_2, + &S0z_3, + &S1z_0, + &S1z_1, + &S1z_2, + &S1z_3, &iz_min, i3(p), dx3(p), @@ -992,33 +992,33 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; // // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; @@ -1028,13 +1028,13 @@ namespace kernel { J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt* Wx_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; // // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; @@ -1055,7 +1055,7 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; // // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; @@ -1070,12 +1070,605 @@ namespace kernel { J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; - // ToDo: actual J update + /* + y-component + */ + // i = 0 + const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const real_t Qdydt = coeff * inv_dt * dxp_r_2; + + J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // + // + J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // + // + J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // + // + J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + + /* + z - component + */ + const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + + const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + + const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + + const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + + // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + + const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + + const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + + const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + + const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + + const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + + const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + + const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + + J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // + J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // + J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // + J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // + // + J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // + // + J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // + // + J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; }; #endif // SHAPE_FUNCTION_ORDER } // namespace kernel From bbd2b3c2cce8ddcdba13d9e083d67a018acffd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 13:27:12 -0500 Subject: [PATCH 016/154] memory reorder --- src/kernels/currents_deposit.hpp | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index a99241c33..789653eea 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -583,23 +583,23 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt * dxp_r_1; J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; /* @@ -629,23 +629,23 @@ namespace kernel { const real_t Qdydt = coeff * inv_dt * dyp_r_1; J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; /* From 4f344eb73cdff2c8d537416d7e583854c5bbd0de Mon Sep 17 00:00:00 2001 From: hayk Date: Thu, 15 May 2025 17:14:49 -0400 Subject: [PATCH 017/154] esirkepov WIP --- extern/Kokkos | 2 +- input.example.toml | 49 +- src/engines/srpic.hpp | 63 +- src/framework/parameters.cpp | 12 +- src/framework/tests/comm_nompi.cpp | 2 - src/kernels/currents_deposit.hpp | 2929 ++++++++++++++-------------- src/kernels/faraday_mink.hpp | 160 +- src/kernels/tests/deposit.cpp | 42 +- src/kernels/tests/faraday_mink.cpp | 9 +- 9 files changed, 1670 insertions(+), 1598 deletions(-) diff --git a/extern/Kokkos b/extern/Kokkos index 175257a51..1b1383c60 160000 --- a/extern/Kokkos +++ b/extern/Kokkos @@ -1 +1 @@ -Subproject commit 175257a51ff29a0059ec48bcd233ee096b2c0438 +Subproject commit 1b1383c6001f3bfe9fe309ca923c2d786600cc79 diff --git a/input.example.toml b/input.example.toml index 629d4dac1..80ab3e59f 100644 --- a/input.example.toml +++ b/input.example.toml @@ -196,15 +196,35 @@ # @default: 0 current_filters = "" - [algorithms.toggles] - # Toggle for the field solver: - # @type bool + [algorithms.deposit] + # Enable the current deposition + # @type: bool # @default: true - fieldsolver = "" - # Toggle for the current deposition: - # @type bool + enable = "" + # Order of the particle shape function + # @type: int + # @default: 1 + order = "" + + # @TODO: fix fieldsolver params below + [algorithms.fieldsolver] + # Enable the EM fieldsolver + # @type: bool # @default: true - deposit = "" + enable = "" + # Yee - all 0.0 - default + # 1D + deltax = -0.065 + # 2D + deltay = -0.065 + betaxy = -0.065 + betayx = -0.065 + # 3D - not yet tested + deltaz = 0.0 + betaxz = 0.0 + betazx = 0.0 + betayz = 0.0 + betazy = 0.0 [algorithms.timestep] # Courant-Friedrichs-Lewy number: @@ -249,21 +269,6 @@ # @type: float: > 0 gamma_rad = "" - [algorithms.fieldsolver] - # Yee - all 0.0 - default - # 1D - deltax = -0.065 - # 2D - deltay = -0.065 - betaxy = -0.065 - betayx = -0.065 - # 3D - not yet tested - deltaz = 0.0 - betaxz = 0.0 - betazx = 0.0 - betayz = 0.0 - betazy = 0.0 - [particles] # Fiducial number of particles per cell: # @required diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 62cddd8d5..91c84f657 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -76,9 +76,9 @@ namespace ntt { void step_forward(timer::Timers& timers, domain_t& dom) override { const auto fieldsolver_enabled = m_params.template get( - "algorithms.toggles.fieldsolver"); + "algorithms.fieldsolver.enable"); const auto deposit_enabled = m_params.template get( - "algorithms.toggles.deposit"); + "algorithms.deposit.enable"); const auto clear_interval = m_params.template get( "particles.clear_interval"); @@ -203,7 +203,7 @@ namespace ntt { "algorithms.fieldsolver.betayz"); const auto betazy = m_params.template get( "algorithms.fieldsolver.betazy"); - real_t coeff1, coeff2; + real_t coeff1, coeff2; if constexpr (M::Dim == Dim::_2D) { coeff1 = dT / SQR(dx); coeff2 = dT; @@ -508,6 +508,7 @@ namespace ntt { void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); + auto shape_order = params.template get("algorithms.deposit.order"); for (auto& species : domain.species) { if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0) or cmp::AlmostZero_host(species.charge())) { @@ -520,31 +521,37 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, - species.i2, - species.i3, - species.i1_prev, - species.i2_prev, - species.i3_prev, - species.dx1, - species.dx2, - species.dx3, - species.dx1_prev, - species.dx2_prev, - species.dx3_prev, - species.ux1, - species.ux2, - species.ux3, - species.phi, - species.weight, - species.tag, - domain.mesh.metric, - (real_t)(species.charge()), - dt)); + if (shape_order == 1) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + domain.mesh.metric, (real_t)(species.charge()), dt)); + // clang-format on + } else if (shape_order == 2) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + domain.mesh.metric, (real_t)(species.charge()), dt)); + // clang-format on + } else { + raise::Error("Invalid shape order for current deposition", HERE); + } } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 6ec7f271b..77ea029f9 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -409,13 +409,15 @@ namespace ntt { "current_filters", defaults::current_filters)); - /* [algorithms.toggles] ------------------------------------------------- */ - set("algorithms.toggles.fieldsolver", - toml::find_or(toml_data, "algorithms", "toggles", "fieldsolver", true)); - set("algorithms.toggles.deposit", - toml::find_or(toml_data, "algorithms", "toggles", "deposit", true)); + /* [algorithms.deposit] ------------------------------------------------- */ + set("algorithms.deposit.enable", + toml::find_or(toml_data, "algorithms", "deposit", "enable", true)); + set("algorithms.deposit.order", + toml::find_or(toml_data, "algorithms", "deposit", "order", 1)); /* [algorithms.fieldsolver] --------------------------------------------- */ + set("algorithms.fieldsolver.enable", + toml::find_or(toml_data, "algorithms", "fieldsolver", "enable", true)); set("algorithms.fieldsolver.deltax", toml::find_or(toml_data, "algorithms", diff --git a/src/framework/tests/comm_nompi.cpp b/src/framework/tests/comm_nompi.cpp index f9581c1e1..c7646ef03 100644 --- a/src/framework/tests/comm_nompi.cpp +++ b/src/framework/tests/comm_nompi.cpp @@ -7,8 +7,6 @@ #include "arch/kokkos_aliases.h" #include "utils/numeric.h" -#include "framework/domain/comm_mpi.hpp" - #include #include diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 789653eea..5ef52bba4 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -26,7 +26,7 @@ namespace kernel { /** * @brief Algorithm for the current deposition */ - template + template class DepositCurrents_kernel { static_assert(M::is_metric, "M must be a metric class"); static constexpr auto D = M::Dim; @@ -42,23 +42,20 @@ namespace kernel { const array_t tag; const M metric; const real_t charge, inv_dt; - const int interpolation_order; - - private: - Inline void shape_function(real_t* S0_0, - real_t* S0_1, - real_t* S0_2, - real_t* S0_3, - real_t* S1_0, - real_t* S1_1, - real_t* S1_2, - real_t* S1_3, - int* i_min, - const index_t i, - const real_t dx, - const index_t i_prev, - const real_t dx_prev) { + Inline void shape_function_2nd(real_t& S0_0, + real_t& S0_1, + real_t& S0_2, + real_t& S0_3, + real_t& S1_0, + real_t& S1_1, + real_t& S1_2, + real_t& S1_3, + ncells_t& i_min, + const index_t& i, + const real_t& dx, + const index_t& i_prev, + const real_t& dx_prev) const { /* Shape function per particle is a 4 element array. We need to find which indices are contributing to the shape function @@ -79,7 +76,16 @@ namespace kernel { */ // find shift in indices - const auto shift_x { i_prev - i - (dx_prev - dx) }; + const auto dx_less_half = static_cast(dx < static_cast(0.5)); + const auto dx_prev_less_half = static_cast( + dx_prev < static_cast(0.5)); + const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; + + const real_t dx_prev_diff = static_cast(dx_prev) + + static_cast( + dx_prev < static_cast(0.5)); + const real_t dx_diff = static_cast(dx) + + static_cast(dx < static_cast(0.5)); // find indices and define shape function if (shift_x > 0) { @@ -89,17 +95,17 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; + i_min = i_prev - dx_prev_less_half + N_GHOSTS; + + S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + dx); - S1_2 = static_cast(0.75) - SQR(dx); - S1_3 = HALF * SQR(HALF - dx); + S1_1 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_2 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_3 = HALF * SQR(HALF - dx_diff); } else if (shift_x < 0) { /* (-1) 0 1 2 3 @@ -107,16 +113,16 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + dx_prev); - S0_2 = static_cast(0.75) - SQR(dx_prev); - S0_3 = HALF * SQR(HALF - dx_prev); - - S1_0 = HALF * SQR(HALF + dx); - S1_1 = static_cast(0.75) - SQR(dx); - S1_2 = HALF * SQR(HALF - dx); + i_min = i - dx_less_half + N_GHOSTS; + + S0_0 = ZERO; + S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_2 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_3 = HALF * SQR(HALF - dx_prev_diff); + + S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; } else { /* @@ -125,16 +131,16 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - 2 + N_GHOSTS; - // shape function, ToDo: fix - S0_0 = HALF * SQR(HALF + dx_prev); - S0_1 = static_cast(0.75) - SQR(dx_prev); - S0_2 = HALF * SQR(HALF - dx_prev); - S0_3 = ZERO; - - S1_0 = HALF * SQR(HALF + dx); - S1_1 = static_cast(0.75) - SQR(dx); - S1_2 = HALF * SQR(HALF - dx); + i_min = i - dx_less_half + N_GHOSTS; + + S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); + S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); + S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_3 = ZERO; + + S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); + S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); + S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; } } @@ -186,7 +192,12 @@ namespace kernel { , tag { tag } , metric { metric } , charge { charge } - , inv_dt { ONE / dt } {} + , inv_dt { ONE / dt } { + raise::ErrorIf( + (O == 2u and N_GHOSTS < 2), + "Order of interpolation is 2, but number of ghost cells is < 2", + HERE); + } /** * @brief Iteration of the loop over particles. @@ -240,1438 +251,1452 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter -#if (SHAPE_FUNCTION_ORDER == 1) - /* - Zig-zag deposit - */ + if constexpr (O == 1u) { + /* + Zig-zag deposit + */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - - const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + - static_cast(i1(p) > i1_prev(p))) }; - const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + - static_cast( - static_cast(i1(p) > i1_prev(p)) + - i1_prev(p) - i1(p))) }; - const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + dxp_r_1 - - dx1_prev(p)) * - coeff * inv_dt }; - const real_t Fx1_2 { (static_cast( - i1(p) - i1_prev(p) - - static_cast(i1(p) > i1_prev(p))) + - dx1(p) - dxp_r_1) * - coeff * inv_dt }; - - auto J_acc = J.access(); - - // tuple_t dxp_r; - if constexpr (D == Dim::_1D) { - const real_t Fx2_1 { HALF * vp[1] * coeff }; - const real_t Fx2_2 { HALF * vp[1] * coeff }; - - const real_t Fx3_1 { HALF * vp[2] * coeff }; - const real_t Fx3_2 { HALF * vp[2] * coeff }; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; - } else if constexpr (D == Dim::_2D || D == Dim::_3D) { - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * + const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * + (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; - const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + - static_cast(i2(p) > i2_prev(p))) }; - const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + + const real_t Wx1_1 { INV_2 * (dxp_r_1 + dx1_prev(p) + + static_cast(i1(p) > i1_prev(p))) }; + const real_t Wx1_2 { INV_2 * (dx1(p) + dxp_r_1 + static_cast( - static_cast(i2(p) > i2_prev(p)) + - i2_prev(p) - i2(p))) }; - const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + - dxp_r_2 - dx2_prev(p)) * + static_cast(i1(p) > i1_prev(p)) + + i1_prev(p) - i1(p))) }; + const real_t Fx1_1 { (static_cast(i1(p) > i1_prev(p)) + + dxp_r_1 - dx1_prev(p)) * coeff * inv_dt }; - const real_t Fx2_2 { (static_cast( - i2(p) - i2_prev(p) - - static_cast(i2(p) > i2_prev(p))) + - dx2(p) - dxp_r_2) * + const real_t Fx1_2 { (static_cast( + i1(p) - i1_prev(p) - + static_cast(i1(p) > i1_prev(p))) + + dx1(p) - dxp_r_1) * coeff * inv_dt }; - if constexpr (D == Dim::_2D) { + auto J_acc = J.access(); + + // tuple_t dxp_r; + if constexpr (D == Dim::_1D) { + const real_t Fx2_1 { HALF * vp[1] * coeff }; + const real_t Fx2_2 { HALF * vp[1] * coeff }; + const real_t Fx3_1 { HALF * vp[2] * coeff }; const real_t Fx3_2 { HALF * vp[2] * coeff }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1; - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * - (ONE - Wx1_2); - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx3) += Fx3_2 * - (ONE - Wx1_2) * - (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * - Wx1_2 * - Wx2_2; - } else { - const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * - (dx3(p) + dx3_prev(p)) * + J_acc(i1_prev(p) + N_GHOSTS, cur::jx1) += Fx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx1) += Fx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx2) += Fx2_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, cur::jx3) += Fx3_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, cur::jx3) += Fx3_2 * (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, cur::jx3) += Fx3_2 * Wx1_2; + } else if constexpr (D == Dim::_2D || D == Dim::_3D) { + const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * + (dx2(p) + dx2_prev(p)) * static_cast(INV_2) }; - const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + - static_cast(i3(p) > i3_prev(p))) }; - const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + + const real_t Wx2_1 { INV_2 * (dxp_r_2 + dx2_prev(p) + + static_cast(i2(p) > i2_prev(p))) }; + const real_t Wx2_2 { INV_2 * (dx2(p) + dxp_r_2 + static_cast( - static_cast(i3(p) > i3_prev(p)) + - i3_prev(p) - i3(p))) }; - const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + - dxp_r_3 - dx3_prev(p)) * + static_cast(i2(p) > i2_prev(p)) + + i2_prev(p) - i2(p))) }; + const real_t Fx2_1 { (static_cast(i2(p) > i2_prev(p)) + + dxp_r_2 - dx2_prev(p)) * coeff * inv_dt }; - const real_t Fx3_2 { (static_cast( - i3(p) - i3_prev(p) - - static_cast(i3(p) > i3_prev(p))) + - dx3(p) - dxp_r_3) * + const real_t Fx2_2 { (static_cast( + i2(p) - i2_prev(p) - + static_cast(i2(p) > i2_prev(p))) + + dx2(p) - dxp_r_2) * coeff * inv_dt }; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS + 1, - cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS + 1, - cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; - - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); - J_acc(i1_prev(p) + N_GHOSTS, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; - J_acc(i1_prev(p) + N_GHOSTS + 1, - i2_prev(p) + N_GHOSTS + 1, - i3_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; - - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); - J_acc(i1(p) + N_GHOSTS, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; - J_acc(i1(p) + N_GHOSTS + 1, - i2(p) + N_GHOSTS + 1, - i3(p) + N_GHOSTS, - cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + if constexpr (D == Dim::_2D) { + const real_t Fx3_1 { HALF * vp[2] * coeff }; + const real_t Fx3_2 { HALF * vp[2] * coeff }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx1) += Fx1_2 * + (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS + 1, cur::jx1) += Fx1_2 * Wx2_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1; + J_acc(i1(p) + N_GHOSTS, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * + (ONE - Wx1_2); + J_acc(i1(p) + N_GHOSTS + 1, i2(p) + N_GHOSTS, cur::jx2) += Fx2_2 * Wx1_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } else { + const auto dxp_r_3 { static_cast(i3(p) == i3_prev(p)) * + (dx3(p) + dx3_prev(p)) * + static_cast(INV_2) }; + const real_t Wx3_1 { INV_2 * (dxp_r_3 + dx3_prev(p) + + static_cast(i3(p) > i3_prev(p))) }; + const real_t Wx3_2 { INV_2 * (dx3(p) + dxp_r_3 + + static_cast( + static_cast(i3(p) > i3_prev(p)) + + i3_prev(p) - i3(p))) }; + const real_t Fx3_1 { (static_cast(i3(p) > i3_prev(p)) + + dxp_r_3 - dx3_prev(p)) * + coeff * inv_dt }; + const real_t Fx3_2 { (static_cast( + i3(p) - i3_prev(p) - + static_cast(i3(p) > i3_prev(p))) + + dx3(p) - dxp_r_3) * + coeff * inv_dt }; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx1) += Fx1_1 * Wx2_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * (ONE - Wx2_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_1 * Wx2_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx1) += Fx1_2 * Wx2_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * (ONE - Wx2_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS + 1, + cur::jx1) += Fx1_2 * Wx2_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx2) += Fx2_1 * Wx1_1 * (ONE - Wx3_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * (ONE - Wx1_1) * Wx3_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_1 * Wx1_1 * Wx3_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx2) += Fx2_2 * Wx1_2 * (ONE - Wx3_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * (ONE - Wx1_2) * Wx3_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS + 1, + cur::jx2) += Fx2_2 * Wx1_2 * Wx3_2; + + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); + J_acc(i1_prev(p) + N_GHOSTS, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; + J_acc(i1_prev(p) + N_GHOSTS + 1, + i2_prev(p) + N_GHOSTS + 1, + i3_prev(p) + N_GHOSTS, + cur::jx3) += Fx3_1 * Wx1_1 * Wx2_1; + + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * (ONE - Wx2_2); + J_acc(i1(p) + N_GHOSTS, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * (ONE - Wx1_2) * Wx2_2; + J_acc(i1(p) + N_GHOSTS + 1, + i2(p) + N_GHOSTS + 1, + i3(p) + N_GHOSTS, + cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; + } } - } -#else // SHAPE_FUNCTION_ORDER - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ - - /* - x - direction - */ - - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; - // indices of the shape function - ncells_t ix_min; - // find indices and define shape function - shape_function(&S0x_0, - &S0x_1, - &S0x_2, - &S0x_3, - &S1x_0, - &S1x_1, - &S1x_2, - &S1x_3, - &ix_min, - i1(p), - dx1(p), - i1_prev(p), - dx1_prev(p)); - - if constexpr (D == Dim::_1D) { - // ToDo - } else if constexpr (D == Dim::_2D) { - + } else if constexpr (O == 2u) { /* - y - direction + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* */ - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - // find indices and define shape function - shape_function(&S0y_0, - &S0y_1, - &S0y_2, - &S0y_3, - &S1y_0, - &S1y_1, - &S1y_2, - &S1y_3, - &iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - - // ToDo: check if this is what I need - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * - (dx1(p) + dx1_prev(p)) * - static_cast(INV_2) }; - - const auto dxp_r_2 { static_cast(i2(p) == i2_prev(p)) * - (dx2(p) + dx2_prev(p)) * - static_cast(INV_2) }; - - // ToDo: actual J update - auto J_acc = J.access(); - - // Esirkepov 2001, Eq. 39 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - - J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; - - /* - y - component - */ - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); - - const real_t Qdydt = coeff * inv_dt * dyp_r_1; - - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; - - /* - z - component, simulated direction - */ - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - const real_t QVz = vp[2] * coeff; - - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - - } else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - uint iy_min; - // find indices and define shape function - shape_function(&S0y_0, - &S0y_1, - &S0y_2, - &S0y_3, - &S1y_0, - &S1y_1, - &S1y_2, - &S1y_3, - &iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - /* - z - direction + x - direction */ // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; + real_t S0x_0, S0x_1, S0x_2, S0x_3; // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; + real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function - uint iz_min; + ncells_t ix_min; // find indices and define shape function - shape_function(&S0z_0, - &S0z_1, - &S0z_2, - &S0z_3, - &S1z_0, - &S1z_1, - &S1z_2, - &S1z_3, - &iz_min, - i3(p), - dx3(p), - i3_prev(p), - dx3_prev(p)); - - // Calculate weight function - for (int i = 0; i < interp_order + 2; ++i) { - for (int j = 0; j < interp_order + 2; ++j) { - for (int k = 0; k < interp_order + 2; ++k) { - // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - } - } - } - - // Unrolled calculations for Wx, Wy, and Wz - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - - J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + shape_function_2nd(S0x_0, + S0x_1, + S0x_2, + S0x_3, + S1x_0, + S1x_1, + S1x_2, + S1x_3, + ix_min, + i1(p), + dx1(p), + i1_prev(p), + dx1_prev(p)); - /* - y-component - */ - // i = 0 - const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const real_t Qdydt = coeff * inv_dt * dxp_r_2; - - J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { - /* - z - component - */ - const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - - const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - - const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - - const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - - // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - - const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - - const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - - const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - - const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - - const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - - const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - - const real_t Qdzdt = coeff * inv_dt * dxp_r_3; - - J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // - J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // - J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // - J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // - // - J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // - // - J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // - // - J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; - }; -#endif // SHAPE_FUNCTION_ORDER - } // namespace kernel + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + ncells_t iy_min; + // find indices and define shape function + shape_function_2nd(S0y_0, + S0y_1, + S0y_2, + S0y_3, + S1y_0, + S1y_1, + S1y_2, + S1y_3, + iy_min, + i2(p), + dx2(p), + i2_prev(p), + dx2_prev(p)); + + // Esirkepov 2001, Eq. 39 + /* + x - component + */ + // Calculate weight function - unrolled + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + + const auto delta_x = static_cast(i1(p) == i1_prev(p)) * + static_cast(dx1(p) - dx1_prev(p)) + + static_cast(i1(p) == i1_prev(p) + 1) * + static_cast(dx1(p) + (1 - dx1_prev(p))) + + static_cast(i1(p) == i1_prev(p) - 1) * + static_cast((1 - dx1(p)) + dx1_prev(p)); + + const auto delta_y = static_cast(i2(p) == i2_prev(p)) * + static_cast(dx2(p) - dx2_prev(p)) + + static_cast(i2(p) == i2_prev(p) + 1) * + static_cast(dx2(p) + (1 - dx2_prev(p))) + + static_cast(i2(p) == i2_prev(p) - 1) * + static_cast((1 - dx2(p)) + dx2_prev(p)); + + const real_t Qdxdt = -coeff * inv_dt * delta_x; + const real_t Qdydt = -coeff * inv_dt * delta_y; + const real_t QVz = vp[2] * coeff; + + // @TODO + jx_local_0_0 = Qdxdt * Wx_0_0; + jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + + auto J_acc = J.access(); + + J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; + + // J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; + // J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; + // J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; + // J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; + // + // J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; + // J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; + // J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; + // J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; + // + // J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; + // J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; + // J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; + // J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; + // + // J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + + /* + y - component + */ + J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + + /* + z - component, simulated direction + */ + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + + } else if constexpr (D == Dim::_3D) { + // /* + // y - direction + // */ + // + // // shape function at previous timestep + // real_t S0y_0, S0y_1, S0y_2, S0y_3; + // // shape function at current timestep + // real_t S1y_0, S1y_1, S1y_2, S1y_3; + // // indices of the shape function + // uint iy_min; + // // find indices and define shape function + // shape_function_2nd(S0y_0, + // S0y_1, + // S0y_2, + // S0y_3, + // S1y_0, + // S1y_1, + // S1y_2, + // S1y_3, + // iy_min, + // i2(p), + // dx2(p), + // i2_prev(p), + // dx2_prev(p)); + // + // /* + // z - direction + // */ + // + // // shape function at previous timestep + // real_t S0z_0, S0z_1, S0z_2, S0z_3; + // // shape function at current timestep + // real_t S1z_0, S1z_1, S1z_2, S1z_3; + // // indices of the shape function + // uint iz_min; + // // find indices and define shape function + // shape_function_2nd(S0z_0, + // S0z_1, + // S0z_2, + // S0z_3, + // S1z_0, + // S1z_1, + // S1z_2, + // S1z_3, + // iz_min, + // i3(p), + // dx3(p), + // i3_prev(p), + // dx3_prev(p)); + // + // // Calculate weight function + // // for (int i = 0; i < interp_order + 2; ++i) { + // // for (int j = 0; j < interp_order + 2; ++j) { + // // for (int k = 0; k < interp_order + 2; ++k) { + // // // Esirkepov 2001, Eq. 31 + // // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + // // + // // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + // // + // // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // // } + // // } + // // } + // // + // // Unrolled calculations for Wx, Wy, and Wz + // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const real_t Qdxdt = coeff * inv_dt * dxp_r_1; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; + // + // /* + // y-component + // */ + // // i = 0 + // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt * dxp_r_2; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + } // dimension + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index 1112e56e7..d92057c77 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -14,6 +14,7 @@ #include "arch/kokkos_aliases.h" #include "utils/error.h" +#include "utils/numeric.h" namespace kernel::mink { using namespace ntt; @@ -42,13 +43,21 @@ namespace kernel::mink { * ! 2D: coeff1 = dt / dx^2, coeff2 = dt * ! 3D: coeff1 = dt / dx */ - Faraday_kernel(const ndfield_t& EB, real_t coeff1, real_t coeff2 - , real_t deltax, real_t deltay, real_t betaxy, real_t betayx - , real_t deltaz, real_t betaxz, real_t betazx, real_t betayz - , real_t betazy) + Faraday_kernel(const ndfield_t& EB, + real_t coeff1, + real_t coeff2, + real_t deltax = ZERO, + real_t deltay = ZERO, + real_t betaxy = ZERO, + real_t betayx = ZERO, + real_t deltaz = ZERO, + real_t betaxz = ZERO, + real_t betazx = ZERO, + real_t betayz = ZERO, + real_t betazy = ZERO) : EB { EB } , coeff1 { coeff1 } - , coeff2 { coeff2 } + , coeff2 { coeff2 } , deltax { deltax } , deltay { deltay } , betaxy { betaxy } @@ -59,17 +68,15 @@ namespace kernel::mink { , betayz { betayz } , betazy { betazy } {} - - Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - const auto alphax = ONE - THREE * deltax; - EB(i1, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) - + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); - EB(i1, em::bx3) += coeff1 * ( - - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) - - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + const auto alphax = ONE - THREE * deltax; + EB(i1, em::bx2) += coeff1 * + (+alphax * (EB(i1 + 1, em::ex3) - EB(i1, em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + EB(i1, em::bx3) += coeff1 * + (-alphax * (EB(i1 + 1, em::ex2) - EB(i1, em::ex2)) - + deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -79,25 +86,28 @@ namespace kernel::mink { if constexpr (D == Dim::_2D) { const auto alphax = ONE - TWO * betaxy - THREE * deltax; const auto alphay = ONE - TWO * betayx - THREE * deltay; - EB(i1, i2, em::bx1) += coeff1 * ( - - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) - - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) - - betayx * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 + 1, i2 , em::ex3)) - - betayx * (EB(i1 - 1, i2 + 1, em::ex3) - EB(i1 - 1, i2 , em::ex3))); - EB(i1, i2, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, i2 , em::ex3) - EB(i1 , i2 , em::ex3)) - + deltax * (EB(i1 + 2, i2 , em::ex3) - EB(i1 - 1, i2 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 , i2 + 1, em::ex3)) - + betaxy * (EB(i1 + 1, i2 - 1, em::ex3) - EB(i1 , i2 - 1, em::ex3))); - EB(i1, i2, em::bx3) += coeff2 * ( - + alphay * (EB(i1 , i2 + 1, em::ex1) - EB(i1 , i2 , em::ex1)) - + deltay * (EB(i1 , i2 + 2, em::ex1) - EB(i1 , i2 - 1, em::ex1)) - + betayx * (EB(i1 + 1, i2 + 1, em::ex1) - EB(i1 + 1, i2 , em::ex1)) - + betayx * (EB(i1 - 1, i2 + 1, em::ex1) - EB(i1 - 1, i2 , em::ex1)) - - alphax * (EB(i1 + 1, i2 , em::ex2) - EB(i1 , i2 , em::ex2)) - - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) - - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); + EB(i1, i2, em::bx1) += + coeff1 * + (-alphay * (EB(i1, i2 + 1, em::ex3) - EB(i1, i2, em::ex3)) - + deltay * (EB(i1, i2 + 2, em::ex3) - EB(i1, i2 - 1, em::ex3)) - + betayx * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1 + 1, i2, em::ex3)) - + betayx * (EB(i1 - 1, i2 + 1, em::ex3) - EB(i1 - 1, i2, em::ex3))); + EB(i1, i2, em::bx2) += + coeff1 * + (+alphax * (EB(i1 + 1, i2, em::ex3) - EB(i1, i2, em::ex3)) + + deltax * (EB(i1 + 2, i2, em::ex3) - EB(i1 - 1, i2, em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, em::ex3) - EB(i1, i2 + 1, em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, em::ex3) - EB(i1, i2 - 1, em::ex3))); + EB(i1, i2, em::bx3) += + coeff2 * + (+alphay * (EB(i1, i2 + 1, em::ex1) - EB(i1, i2, em::ex1)) + + deltay * (EB(i1, i2 + 2, em::ex1) - EB(i1, i2 - 1, em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, em::ex1) - EB(i1 + 1, i2, em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, em::ex1) - EB(i1 - 1, i2, em::ex1)) - + alphax * (EB(i1 + 1, i2, em::ex2) - EB(i1, i2, em::ex2)) - + deltax * (EB(i1 + 2, i2, em::ex2) - EB(i1 - 1, i2, em::ex2)) - + betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1, i2 + 1, em::ex2)) - + betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1, i2 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); @@ -109,45 +119,51 @@ namespace kernel::mink { const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; - EB(i1, i2, i3, em::bx1) += coeff1 * ( - + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) - + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) - + betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 + 1, i2 , i3 , em::ex2)) - + betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) - + betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) - + betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - - alphay * (EB(i1 , i2 + 1, i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) - - deltay * (EB(i1 , i2 + 2, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) - - betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 + 1, i2 , i3 , em::ex3)) - - betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) - - betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) - - betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3))); - EB(i1, i2, i3, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, i2 , i3 , em::ex3) - EB(i1 , i2 , i3 , em::ex3)) - + deltax * (EB(i1 + 2, i2 , i3 , em::ex3) - EB(i1 - 1, i2 , i3 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex3) - EB(i1 , i2 + 1, i3 , em::ex3)) - + betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex3) - EB(i1 , i2 - 1, i3 , em::ex3)) - + betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex3) - EB(i1 , i2 , i3 + 1, em::ex3)) - + betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex3) - EB(i1 , i2 , i3 - 1, em::ex3)) - - alphaz * (EB(i1 , i2 , i3 + 1, em::ex1) - EB(i1 , i2 , i3 , em::ex1)) - - deltaz * (EB(i1 , i2 , i3 + 2, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) - - betazx * (EB(i1 + 1, i2 , i3 + 1, em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) - - betazx * (EB(i1 - 1, i2 , i3 + 1, em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) - - betazy * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 + 1, i3 , em::ex1)) - - betazy * (EB(i1 , i2 - 1, i3 + 1, em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1))); - EB(i1, i2, i3, em::bx3) += coeff1 * ( - + alphay * (EB(i1 , i2 + 1, i3 , em::ex1) - EB(i1 , i2 , i3 , em::ex1)) - + deltay * (EB(i1 , i2 + 2, i3 , em::ex1) - EB(i1 , i2 - 1, i3 , em::ex1)) - + betayx * (EB(i1 + 1, i2 + 1, i3 , em::ex1) - EB(i1 + 1, i2 , i3 , em::ex1)) - + betayx * (EB(i1 - 1, i2 + 1, i3 , em::ex1) - EB(i1 - 1, i2 , i3 , em::ex1)) - + betayz * (EB(i1 , i2 + 1, i3 + 1, em::ex1) - EB(i1 , i2 , i3 + 1, em::ex1)) - + betayz * (EB(i1 , i2 + 1, i3 - 1, em::ex1) - EB(i1 , i2 , i3 - 1, em::ex1)) - - alphax * (EB(i1 + 1, i2 , i3 , em::ex2) - EB(i1 , i2 , i3 , em::ex2)) - - deltax * (EB(i1 + 2, i2 , i3 , em::ex2) - EB(i1 - 1, i2 , i3 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 + 1, i3 , em::ex2) - EB(i1 , i2 + 1, i3 , em::ex2)) - - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) - - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); + EB(i1, i2, i3, em::bx1) += + coeff1 * + (+alphaz * (EB(i1, i2, i3 + 1, em::ex2) - EB(i1, i2, i3, em::ex2)) + + deltaz * (EB(i1, i2, i3 + 2, em::ex2) - EB(i1, i2, i3 - 1, em::ex2)) + + betazx * (EB(i1 + 1, i2, i3 + 1, em::ex2) - EB(i1 + 1, i2, i3, em::ex2)) + + betazx * (EB(i1 - 1, i2, i3 + 1, em::ex2) - EB(i1 - 1, i2, i3, em::ex2)) + + betazy * (EB(i1, i2 + 1, i3 + 1, em::ex2) - EB(i1, i2 + 1, i3, em::ex2)) + + betazy * (EB(i1, i2 - 1, i3 + 1, em::ex2) - EB(i1, i2 - 1, i3, em::ex2)) - + alphay * (EB(i1, i2 + 1, i3, em::ex3) - EB(i1, i2, i3, em::ex3)) - + deltay * (EB(i1, i2 + 2, i3, em::ex3) - EB(i1, i2 - 1, i3, em::ex3)) - + betayx * (EB(i1 + 1, i2 + 1, i3, em::ex3) - EB(i1 + 1, i2, i3, em::ex3)) - + betayx * (EB(i1 - 1, i2 + 1, i3, em::ex3) - EB(i1 - 1, i2, i3, em::ex3)) - + betayz * (EB(i1, i2 + 1, i3 + 1, em::ex3) - EB(i1, i2, i3 + 1, em::ex3)) - + betayz * + (EB(i1, i2 + 1, i3 - 1, em::ex3) - EB(i1, i2, i3 - 1, em::ex3))); + EB(i1, i2, i3, em::bx2) += + coeff1 * + (+alphax * (EB(i1 + 1, i2, i3, em::ex3) - EB(i1, i2, i3, em::ex3)) + + deltax * (EB(i1 + 2, i2, i3, em::ex3) - EB(i1 - 1, i2, i3, em::ex3)) + + betaxy * (EB(i1 + 1, i2 + 1, i3, em::ex3) - EB(i1, i2 + 1, i3, em::ex3)) + + betaxy * (EB(i1 + 1, i2 - 1, i3, em::ex3) - EB(i1, i2 - 1, i3, em::ex3)) + + betaxz * (EB(i1 + 1, i2, i3 + 1, em::ex3) - EB(i1, i2, i3 + 1, em::ex3)) + + betaxz * (EB(i1 + 1, i2, i3 - 1, em::ex3) - EB(i1, i2, i3 - 1, em::ex3)) - + alphaz * (EB(i1, i2, i3 + 1, em::ex1) - EB(i1, i2, i3, em::ex1)) - + deltaz * (EB(i1, i2, i3 + 2, em::ex1) - EB(i1, i2, i3 - 1, em::ex1)) - + betazx * (EB(i1 + 1, i2, i3 + 1, em::ex1) - EB(i1 + 1, i2, i3, em::ex1)) - + betazx * (EB(i1 - 1, i2, i3 + 1, em::ex1) - EB(i1 - 1, i2, i3, em::ex1)) - + betazy * (EB(i1, i2 + 1, i3 + 1, em::ex1) - EB(i1, i2 + 1, i3, em::ex1)) - + betazy * + (EB(i1, i2 - 1, i3 + 1, em::ex1) - EB(i1, i2 - 1, i3, em::ex1))); + EB(i1, i2, i3, em::bx3) += + coeff1 * + (+alphay * (EB(i1, i2 + 1, i3, em::ex1) - EB(i1, i2, i3, em::ex1)) + + deltay * (EB(i1, i2 + 2, i3, em::ex1) - EB(i1, i2 - 1, i3, em::ex1)) + + betayx * (EB(i1 + 1, i2 + 1, i3, em::ex1) - EB(i1 + 1, i2, i3, em::ex1)) + + betayx * (EB(i1 - 1, i2 + 1, i3, em::ex1) - EB(i1 - 1, i2, i3, em::ex1)) + + betayz * (EB(i1, i2 + 1, i3 + 1, em::ex1) - EB(i1, i2, i3 + 1, em::ex1)) + + betayz * (EB(i1, i2 + 1, i3 - 1, em::ex1) - EB(i1, i2, i3 - 1, em::ex1)) - + alphax * (EB(i1 + 1, i2, i3, em::ex2) - EB(i1, i2, i3, em::ex2)) - + deltax * (EB(i1 + 2, i2, i3, em::ex2) - EB(i1 - 1, i2, i3, em::ex2)) - + betaxy * (EB(i1 + 1, i2 + 1, i3, em::ex2) - EB(i1, i2 + 1, i3, em::ex2)) - + betaxy * (EB(i1 + 1, i2 - 1, i3, em::ex2) - EB(i1, i2 - 1, i3, em::ex2)) - + betaxz * (EB(i1 + 1, i2, i3 + 1, em::ex2) - EB(i1, i2, i3 + 1, em::ex2)) - + betaxz * + (EB(i1 + 1, i2, i3 - 1, em::ex2) - EB(i1, i2, i3 - 1, em::ex2))); } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index e6967eb14..d64e4bb2f 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -124,7 +124,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -136,31 +136,49 @@ void testDeposit(const std::vector& res, Kokkos::Experimental::contribute(J, J_scat); - real_t SumDivJ { 0.0 }; + const auto range = Kokkos::MDRangePolicy>( + { N_GHOSTS, N_GHOSTS }, + { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); + + real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; Kokkos::parallel_reduce( "SumDivJ", - Kokkos::MDRangePolicy>({ N_GHOSTS, N_GHOSTS }, - { nx1 + N_GHOSTS, nx2 + N_GHOSTS }), + range, Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1) - J(i - 1, j, cur::jx1) + J(i, j, cur::jx2) - J(i, j - 1, cur::jx2); }, SumDivJ); + Kokkos::parallel_reduce( + "SumJx", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, + SumJx); + + Kokkos::parallel_reduce( + "SumJy", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, + SumJy); + auto J_h = Kokkos::create_mirror_view(J); Kokkos::deep_copy(J_h, J); if (not cmp::AlmostZero(SumDivJ)) { throw std::logic_error("DepositCurrents_kernel::SumDivJ != 0"); } - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), - "DepositCurrents_kernel::Jx1 is incorrect"); - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), - "DepositCurrents_kernel::Jx2 is incorrect"); - errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy1, "", acc), - "DepositCurrents_kernel::Jy1 is incorrect"); - errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy2, "", acc), - "DepositCurrents_kernel::Jy2 is incorrect"); + + std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; + std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), + // "DepositCurrents_kernel::Jx1 is incorrect"); + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), + // "DepositCurrents_kernel::Jx2 is incorrect"); + // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy1, "", acc), + // "DepositCurrents_kernel::Jy1 is incorrect"); + // errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), Jy2, "", acc), + // "DepositCurrents_kernel::Jy2 is incorrect"); } auto main(int argc, char* argv[]) -> int { diff --git a/src/kernels/tests/faraday_mink.cpp b/src/kernels/tests/faraday_mink.cpp index 74c2b9b1a..7394d9c01 100644 --- a/src/kernels/tests/faraday_mink.cpp +++ b/src/kernels/tests/faraday_mink.cpp @@ -4,6 +4,7 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/numeric.h" #include "metrics/minkowski.h" @@ -108,7 +109,7 @@ void testFaraday(const std::vector& res) { const real_t sx = constant::TWO_PI, sy = 4.0 * constant::PI; const auto metric = Minkowski { res, - {{ ZERO, sx }, { ZERO, sy }} + { { ZERO, sx }, { ZERO, sy } } }; auto emfield = ndfield_t { "emfield", res[0] + 2 * N_GHOSTS, @@ -116,7 +117,7 @@ void testFaraday(const std::vector& res) { const std::size_t i1min = N_GHOSTS, i1max = res[0] + N_GHOSTS; const std::size_t i2min = N_GHOSTS, i2max = res[1] + N_GHOSTS; const auto range = CreateRangePolicy({ i1min, i2min }, - { i1max, i2max }); + { i1max, i2max }); const auto range_ext = CreateRangePolicy( { 0, 0 }, { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS }); @@ -212,7 +213,7 @@ void testFaraday(const std::vector& res) { sz = constant::TWO_PI; const auto metric = Minkowski { res, - {{ ZERO, sx }, { ZERO, sy }, { ZERO, sz }} + { { ZERO, sx }, { ZERO, sy }, { ZERO, sz } } }; auto emfield = ndfield_t { "emfield", res[0] + 2 * N_GHOSTS, @@ -222,7 +223,7 @@ void testFaraday(const std::vector& res) { const std::size_t i2min = N_GHOSTS, i2max = res[1] + N_GHOSTS; const std::size_t i3min = N_GHOSTS, i3max = res[2] + N_GHOSTS; const auto range = CreateRangePolicy({ i1min, i2min, i3min }, - { i1max, i2max, i3max }); + { i1max, i2max, i3max }); const auto range_ext = CreateRangePolicy( { 0, 0, 0 }, { res[0] + 2 * N_GHOSTS, res[1] + 2 * N_GHOSTS, res[2] + 2 * N_GHOSTS }); From 1e437bab05c042dfab0082eec0e15fb18e0d2c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 16:42:02 -0500 Subject: [PATCH 018/154] added missing recursive J update --- src/kernels/currents_deposit.hpp | 601 +++++-------------------------- 1 file changed, 88 insertions(+), 513 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5ef52bba4..761ae8ab7 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -654,59 +654,101 @@ namespace kernel { const real_t Qdxdt = -coeff * inv_dt * delta_x; const real_t Qdydt = -coeff * inv_dt * delta_y; - const real_t QVz = vp[2] * coeff; - - // @TODO - jx_local_0_0 = Qdxdt * Wx_0_0; - jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + const real_t QVz = -coeff * vp[2]; + + // Esirkepov - Eq. 32 + // x-component + const auto jx_local_0_0 = -Qdxdt * Wx_0_0; + const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; + const auto jx_local_2_0 = jx_local_1_0 - Qdxdt * Wx_2_0; + const auto jx_local_3_0 = jx_local_2_0 - Qdxdt * Wx_3_0; + + const auto jx_local_0_1 = -Qdxdt * Wx_0_1; + const auto jx_local_1_1 = jx_local_0_1 - Qdxdt * Wx_1_1; + const auto jx_local_2_1 = jx_local_1_1 - Qdxdt * Wx_2_1; + const auto jx_local_3_1 = jx_local_2_1 - Qdxdt * Wx_3_1; + + const auto jx_local_0_2 = -Qdxdt * Wx_0_2; + const auto jx_local_1_2 = jx_local_0_2 - Qdxdt * Wx_1_2; + const auto jx_local_2_2 = jx_local_1_2 - Qdxdt * Wx_2_2; + const auto jx_local_3_2 = jx_local_2_2 - Qdxdt * Wx_3_2; + + const auto jx_local_0_3 = -Qdxdt * Wx_0_3; + const auto jx_local_1_3 = jx_local_0_3 - Qdxdt * Wx_1_3; + const auto jx_local_2_3 = jx_local_1_3 - Qdxdt * Wx_2_3; + const auto jx_local_3_3 = jx_local_2_3 - Qdxdt * Wx_3_3; + + // y-component + const auto jy_local_0_0 = -Qdydt * Wy_0_0; + const auto jy_local_1_0 = jy_local_0_0 - Qdydt * Wy_1_0; + const auto jy_local_2_0 = jy_local_1_0 - Qdydt * Wy_2_0; + const auto jy_local_3_0 = jy_local_2_0 - Qdydt * Wy_3_0; + + const auto jy_local_0_1 = -Qdydt * Wy_0_1; + const auto jy_local_1_1 = jy_local_0_1 - Qdydt * Wy_1_1; + const auto jy_local_2_1 = jy_local_1_1 - Qdydt * Wy_2_1; + const auto jy_local_3_1 = jy_local_2_1 - Qdydt * Wy_3_1; + + const auto jy_local_0_2 = -Qdydt * Wy_0_2; + const auto jy_local_1_2 = jy_local_0_2 - Qdydt * Wy_1_2; + const auto jy_local_2_2 = jy_local_1_2 - Qdydt * Wy_2_2; + const auto jy_local_3_2 = jy_local_2_2 - Qdydt * Wy_3_2; + + const auto jy_local_0_3 = -Qdydt * Wy_0_3; + const auto jy_local_1_3 = jy_local_0_3 - Qdydt * Wy_1_3; + const auto jy_local_2_3 = jy_local_1_3 - Qdydt * Wy_2_3; + const auto jy_local_3_3 = jy_local_2_3 - Qdydt * Wy_3_3; + /* + Current update + */ auto J_acc = J.access(); + /* + x - component + */ J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; - - // J_acc(ix_min, iy_min, cur::jx1) += Qdxdt * Wx_0_0; - // J_acc(ix_min, iy_min + 1, cur::jx1) += Qdxdt * Wx_0_1; - // J_acc(ix_min, iy_min + 2, cur::jx1) += Qdxdt * Wx_0_2; - // J_acc(ix_min, iy_min + 3, cur::jx1) += Qdxdt * Wx_0_3; - // - // J_acc(ix_min + 1, iy_min, cur::jx1) += Qdxdt * Wx_1_0; - // J_acc(ix_min + 1, iy_min + 1, cur::jx1) += Qdxdt * Wx_1_1; - // J_acc(ix_min + 1, iy_min + 2, cur::jx1) += Qdxdt * Wx_1_2; - // J_acc(ix_min + 1, iy_min + 3, cur::jx1) += Qdxdt * Wx_1_3; - // - // J_acc(ix_min + 2, iy_min, cur::jx1) += Qdxdt * Wx_2_0; - // J_acc(ix_min + 2, iy_min + 1, cur::jx1) += Qdxdt * Wx_2_1; - // J_acc(ix_min + 2, iy_min + 2, cur::jx1) += Qdxdt * Wx_2_2; - // J_acc(ix_min + 2, iy_min + 3, cur::jx1) += Qdxdt * Wx_2_3; - // - // J_acc(ix_min + 3, iy_min, cur::jx1) += Qdxdt * Wx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += Qdxdt * Wx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += Qdxdt * Wx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += Qdxdt * Wx_3_3; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_local_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_local_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_local_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_local_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_local_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_local_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_local_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_local_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_local_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_local_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx1) += jx_local_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_local_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_local_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_local_3_3; /* y - component */ - J_acc(ix_min, iy_min, cur::jx2) += Qdydt * Wy_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += Qdydt * Wy_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += Qdydt * Wy_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += Qdydt * Wy_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += Qdydt * Wy_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += Qdydt * Wy_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += Qdydt * Wy_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += Qdydt * Wy_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += Qdydt * Wy_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += Qdydt * Wy_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += Qdydt * Wy_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += Qdydt * Wy_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += Qdydt * Wy_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += Qdydt * Wy_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += Qdydt * Wy_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += Qdydt * Wy_3_3; + J_acc(ix_min, iy_min, cur::jx2) += jy_local_0_0; + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_local_0_1; + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_local_0_2; + J_acc(ix_min, iy_min + 3, cur::jx2) += jy_local_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_local_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_local_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_local_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_local_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_local_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_local_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_local_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_local_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_local_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_local_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_local_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_local_3_3; /* z - component, simulated direction @@ -1233,471 +1275,4 @@ namespace kernel { // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt * dxp_r_2; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // - // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; - } // dimension - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } - } - }; -} // namespace kernel - -#undef i_di_to_Xi - -#endif // KERNELS_CURRENTS_DEPOSIT_HPP + \ No newline at end of file From 51a4f69d0cfdec84d73332a43ae0dbae4c42e38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 14 May 2025 18:38:41 -0500 Subject: [PATCH 019/154] fix comment --- src/kernels/currents_deposit.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 761ae8ab7..bccd8bace 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -61,8 +61,8 @@ namespace kernel { We need to find which indices are contributing to the shape function For this we first compute the indices of the particle position - Let x be the particle position at the current timestep - Let * be the particle position at the previous timestep + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep (-1) 0 1 2 3 From cae1eef0be99c0dd8ab439aca9770eac26a32a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 15 May 2025 19:07:55 -0500 Subject: [PATCH 020/154] fix accidental deletion of commented lines --- src/kernels/currents_deposit.hpp | 469 ++++++++++++++++++++++++++++++- 1 file changed, 468 insertions(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index bccd8bace..5f2a6e05e 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -1275,4 +1275,471 @@ namespace kernel { // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - \ No newline at end of file + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt * dxp_r_2; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + // + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; + // // + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; + // // + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; + // // + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; + // // + // // + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; + // // + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; + // // + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; + // // + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; + // // + // // + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; + // // + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; + // // + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; + // // + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; + // // + // // + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; + // // + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; + // // + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; + // // + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + } // dimension + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } + } + }; +} // namespace kernel + +#undef i_di_to_Xi + +#endif // KERNELS_CURRENTS_DEPOSIT_HPP \ No newline at end of file From b87500e842528335d5b5e48636d46681fbaf7460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 16 May 2025 10:54:57 -0500 Subject: [PATCH 021/154] fix in y current deposit --- src/kernels/currents_deposit.hpp | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5f2a6e05e..0ea467743 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -124,7 +124,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; - } else { + } else if (shift_x == 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -142,6 +142,8 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); S1_2 = HALF * SQR(HALF - dx_diff); S1_3 = ZERO; + } else { + raise::Error("Invalid shift in indices", HERE); } } @@ -652,11 +654,11 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = -coeff * inv_dt * delta_x; - const real_t Qdydt = -coeff * inv_dt * delta_y; - const real_t QVz = -coeff * vp[2]; + const real_t Qdxdt = coeff * inv_dt * delta_x; + const real_t Qdydt = coeff * inv_dt * delta_y; + const real_t QVz = coeff * vp[2]; - // Esirkepov - Eq. 32 + // Esirkepov - Eq. 39 // x-component const auto jx_local_0_0 = -Qdxdt * Wx_0_0; const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; @@ -680,24 +682,24 @@ namespace kernel { // y-component const auto jy_local_0_0 = -Qdydt * Wy_0_0; - const auto jy_local_1_0 = jy_local_0_0 - Qdydt * Wy_1_0; - const auto jy_local_2_0 = jy_local_1_0 - Qdydt * Wy_2_0; - const auto jy_local_3_0 = jy_local_2_0 - Qdydt * Wy_3_0; - - const auto jy_local_0_1 = -Qdydt * Wy_0_1; - const auto jy_local_1_1 = jy_local_0_1 - Qdydt * Wy_1_1; - const auto jy_local_2_1 = jy_local_1_1 - Qdydt * Wy_2_1; - const auto jy_local_3_1 = jy_local_2_1 - Qdydt * Wy_3_1; - - const auto jy_local_0_2 = -Qdydt * Wy_0_2; - const auto jy_local_1_2 = jy_local_0_2 - Qdydt * Wy_1_2; - const auto jy_local_2_2 = jy_local_1_2 - Qdydt * Wy_2_2; - const auto jy_local_3_2 = jy_local_2_2 - Qdydt * Wy_3_2; - - const auto jy_local_0_3 = -Qdydt * Wy_0_3; - const auto jy_local_1_3 = jy_local_0_3 - Qdydt * Wy_1_3; - const auto jy_local_2_3 = jy_local_1_3 - Qdydt * Wy_2_3; - const auto jy_local_3_3 = jy_local_2_3 - Qdydt * Wy_3_3; + const auto jy_local_0_1 = jy_local_0_0 - Qdydt * Wy_0_1; + const auto jy_local_0_2 = jy_local_0_1 - Qdydt * Wy_0_2; + const auto jy_local_0_3 = jy_local_0_2 - Qdydt * Wy_0_3; + + const auto jy_local_1_0 = -Qdydt * Wy_1_0; + const auto jy_local_1_1 = jy_local_1_0 - Qdydt * Wy_1_1; + const auto jy_local_1_2 = jy_local_1_1 - Qdydt * Wy_1_2; + const auto jy_local_1_3 = jy_local_1_2 - Qdydt * Wy_1_3; + + const auto jy_local_2_0 = -Qdydt * Wy_2_0; + const auto jy_local_2_1 = jy_local_2_0 - Qdydt * Wy_2_1; + const auto jy_local_2_2 = jy_local_2_1 - Qdydt * Wy_2_2; + const auto jy_local_2_3 = jy_local_2_2 - Qdydt * Wy_2_3; + + const auto jy_local_3_0 = -Qdydt * Wy_3_0; + const auto jy_local_3_1 = jy_local_3_0 - Qdydt * Wy_3_1; + const auto jy_local_3_2 = jy_local_3_1 - Qdydt * Wy_3_2; + const auto jy_local_3_3 = jy_local_3_2 - Qdydt * Wy_3_3; /* Current update From f56afef035d075cac1f92f750581624495ba40da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 16 May 2025 14:11:47 -0500 Subject: [PATCH 022/154] bugfix in parameter access --- src/engines/srpic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 91c84f657..850355a65 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -508,7 +508,7 @@ namespace ntt { void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); - auto shape_order = params.template get("algorithms.deposit.order"); + auto shape_order = m_params.template get("algorithms.deposit.order"); for (auto& species : domain.species) { if ((species.pusher() == PrtlPusher::NONE) or (species.npart() == 0) or cmp::AlmostZero_host(species.charge())) { From 63dc8a9949d6048e1d525cb4a12397c6ebaa14a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 09:16:07 -0500 Subject: [PATCH 023/154] updates to J update indexing --- src/kernels/currents_deposit.hpp | 227 +++++++++++++++---------------- 1 file changed, 111 insertions(+), 116 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0ea467743..be59328d8 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,6 +52,7 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, + real_t& update_x2, const index_t& i, const real_t& dx, const index_t& i_prev, @@ -79,6 +80,7 @@ namespace kernel { const auto dx_less_half = static_cast(dx < static_cast(0.5)); const auto dx_prev_less_half = static_cast( dx_prev < static_cast(0.5)); + const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; const real_t dx_prev_diff = static_cast(dx_prev) + @@ -95,7 +97,8 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - dx_prev_less_half + N_GHOSTS; + i_min = i_prev - dx_prev_less_half + N_GHOSTS; + update_x2 = ONE; S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); @@ -113,7 +116,8 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; + i_min = i - dx_less_half + N_GHOSTS; + update_x2 = ONE; S0_0 = ZERO; S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); @@ -131,7 +135,8 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; + i_min = i - dx_less_half + N_GHOSTS; + update_x2 = ZERO; S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); @@ -513,20 +518,15 @@ namespace kernel { real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function ncells_t ix_min; + real_t update_x2; // find indices and define shape function - shape_function_2nd(S0x_0, - S0x_1, - S0x_2, - S0x_3, - S1x_0, - S1x_1, - S1x_2, - S1x_3, - ix_min, - i1(p), - dx1(p), - i1_prev(p), - dx1_prev(p)); + // clang-format off + shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, + S1x_0, S1x_1, S1x_2, S1x_3, + ix_min, update_x2, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + // clang-format on if constexpr (D == Dim::_1D) { // ToDo @@ -542,21 +542,16 @@ namespace kernel { real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function ncells_t iy_min; + real_t update_y2; // find indices and define shape function - shape_function_2nd(S0y_0, - S0y_1, - S0y_2, - S0y_3, - S1y_0, - S1y_1, - S1y_2, - S1y_3, - iy_min, - i2(p), - dx2(p), - i2_prev(p), - dx2_prev(p)); - + // clang-format off + shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + S1y_0, S1y_1, S1y_2, S1y_3, + iy_min, update_y2, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + // Esirkepov 2001, Eq. 39 /* x - component @@ -654,52 +649,52 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = coeff * inv_dt * delta_x; - const real_t Qdydt = coeff * inv_dt * delta_y; - const real_t QVz = coeff * vp[2]; + const real_t Qdxdt = -coeff; // * inv_dt * delta_x; + const real_t Qdydt = -coeff; // * inv_dt * delta_y; + const real_t QVz = -coeff * vp[2]; // Esirkepov - Eq. 39 // x-component - const auto jx_local_0_0 = -Qdxdt * Wx_0_0; - const auto jx_local_1_0 = jx_local_0_0 - Qdxdt * Wx_1_0; - const auto jx_local_2_0 = jx_local_1_0 - Qdxdt * Wx_2_0; - const auto jx_local_3_0 = jx_local_2_0 - Qdxdt * Wx_3_0; - - const auto jx_local_0_1 = -Qdxdt * Wx_0_1; - const auto jx_local_1_1 = jx_local_0_1 - Qdxdt * Wx_1_1; - const auto jx_local_2_1 = jx_local_1_1 - Qdxdt * Wx_2_1; - const auto jx_local_3_1 = jx_local_2_1 - Qdxdt * Wx_3_1; - - const auto jx_local_0_2 = -Qdxdt * Wx_0_2; - const auto jx_local_1_2 = jx_local_0_2 - Qdxdt * Wx_1_2; - const auto jx_local_2_2 = jx_local_1_2 - Qdxdt * Wx_2_2; - const auto jx_local_3_2 = jx_local_2_2 - Qdxdt * Wx_3_2; - - const auto jx_local_0_3 = -Qdxdt * Wx_0_3; - const auto jx_local_1_3 = jx_local_0_3 - Qdxdt * Wx_1_3; - const auto jx_local_2_3 = jx_local_1_3 - Qdxdt * Wx_2_3; - const auto jx_local_3_3 = jx_local_2_3 - Qdxdt * Wx_3_3; + const auto jx_0_0 = -Qdxdt * Wx_0_0; + const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; + const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; + const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; + + const auto jx_0_1 = -Qdxdt * Wx_0_1; + const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; + const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; + const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; + + const auto jx_0_2 = -Qdxdt * Wx_0_2; + const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; + const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; + const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; + + const auto jx_0_3 = -Qdxdt * Wx_0_3; + const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; + const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; // y-component - const auto jy_local_0_0 = -Qdydt * Wy_0_0; - const auto jy_local_0_1 = jy_local_0_0 - Qdydt * Wy_0_1; - const auto jy_local_0_2 = jy_local_0_1 - Qdydt * Wy_0_2; - const auto jy_local_0_3 = jy_local_0_2 - Qdydt * Wy_0_3; - - const auto jy_local_1_0 = -Qdydt * Wy_1_0; - const auto jy_local_1_1 = jy_local_1_0 - Qdydt * Wy_1_1; - const auto jy_local_1_2 = jy_local_1_1 - Qdydt * Wy_1_2; - const auto jy_local_1_3 = jy_local_1_2 - Qdydt * Wy_1_3; - - const auto jy_local_2_0 = -Qdydt * Wy_2_0; - const auto jy_local_2_1 = jy_local_2_0 - Qdydt * Wy_2_1; - const auto jy_local_2_2 = jy_local_2_1 - Qdydt * Wy_2_2; - const auto jy_local_2_3 = jy_local_2_2 - Qdydt * Wy_2_3; - - const auto jy_local_3_0 = -Qdydt * Wy_3_0; - const auto jy_local_3_1 = jy_local_3_0 - Qdydt * Wy_3_1; - const auto jy_local_3_2 = jy_local_3_1 - Qdydt * Wy_3_2; - const auto jy_local_3_3 = jy_local_3_2 - Qdydt * Wy_3_3; + const auto jy_0_0 = -Qdydt * Wy_0_0; + const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; + const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; + const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; + + const auto jy_1_0 = -Qdydt * Wy_1_0; + const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; + const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; + const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; + + const auto jy_2_0 = -Qdydt * Wy_2_0; + const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; + const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; + + const auto jy_3_0 = -Qdydt * Wy_3_0; + const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; + const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; + const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; /* Current update @@ -709,48 +704,48 @@ namespace kernel { /* x - component */ - J_acc(ix_min, iy_min, cur::jx1) += jx_local_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_local_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_local_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_local_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_local_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_local_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_local_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_local_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_local_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_local_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_local_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_local_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx1) += jx_local_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_local_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_local_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_local_3_3; + J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += update_y2 * jx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += update_y2 * jx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += update_x2 * jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += update_x2 * jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; + + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x3 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x3 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x3 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x3 * jx_3_3; /* y - component */ - J_acc(ix_min, iy_min, cur::jx2) += jy_local_0_0; - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_local_0_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_local_0_2; - J_acc(ix_min, iy_min + 3, cur::jx2) += jy_local_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_local_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_local_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_local_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_local_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_local_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_local_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_local_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_local_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_local_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_local_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_local_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_local_3_3; + J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += update_x2 * jy_3_0; + + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += update_x2 * jy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += update_y2 * jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += update_y2 * jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += update_y2 * jy_2_2; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += update_y2 * update_x2 * jy_3_2; + + // J_acc(ix_min, iy_min + 3, cur::jx2) += update_y3 * jy_0_3; + // J_acc(ix_min + 1, iy_min + 3, cur::jx2) += update_y3 * jy_1_3; + // J_acc(ix_min + 2, iy_min + 3, cur::jx2) += update_y3 * jy_2_3; + // J_acc(ix_min + 3, iy_min + 3, cur::jx2) += update_x3 * jy_3_3; /* z - component, simulated direction @@ -758,22 +753,22 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_0_3; J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_1_3; J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_2_3; - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { // /* From 822cb9652aa3943faf632b6e49659c28ab130f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 12:24:50 -0500 Subject: [PATCH 024/154] fixed shift calculation --- src/kernels/currents_deposit.hpp | 90 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index be59328d8..ace98f134 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,11 +52,11 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, - real_t& update_x2, + real_t& update_i2, const index_t& i, - const real_t& dx, + const real_t& di, const index_t& i_prev, - const real_t& dx_prev) const { + const real_t& di_prev) const { /* Shape function per particle is a 4 element array. We need to find which indices are contributing to the shape function @@ -77,17 +77,18 @@ namespace kernel { */ // find shift in indices - const auto dx_less_half = static_cast(dx < static_cast(0.5)); - const auto dx_prev_less_half = static_cast( - dx_prev < static_cast(0.5)); + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); - const auto shift_x { (i - i_prev) - (dx_less_half - dx_prev_less_half) }; + const auto shift_x = (i - di_less_half) - (i_prev - di_prev_less_half); - const real_t dx_prev_diff = static_cast(dx_prev) + - static_cast( - dx_prev < static_cast(0.5)); - const real_t dx_diff = static_cast(dx) + - static_cast(dx < static_cast(0.5)); + // find the minimum index of the shape function + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function + const auto i_center_prev = static_cast(i_min + 1 - i_prev); + const auto i_center = static_cast(i_min + 1 - i); // find indices and define shape function if (shift_x > 0) { @@ -97,18 +98,17 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - i_min = i_prev - dx_prev_less_half + N_GHOSTS; - update_x2 = ONE; + update_i2 = ONE; - S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_2 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_3 = HALF * SQR(HALF - dx_diff); + S1_1 = HALF * SQR(HALF + (i_center - di)); + S1_2 = static_cast(0.75) - SQR(i_center - di); + S1_3 = HALF * SQR(HALF - (i_center - di)); } else if (shift_x < 0) { /* (-1) 0 1 2 3 @@ -116,17 +116,16 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; - update_x2 = ONE; + update_i2 = ONE; S0_0 = ZERO; - S0_1 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_2 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_3 = HALF * SQR(HALF - dx_prev_diff); + S0_1 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_2 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_3 = HALF * SQR(HALF - (i_center_prev - di_prev)); - S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_2 = HALF * SQR(HALF - dx_diff); + S1_0 = HALF * SQR(HALF + (i_center - di)); + S1_1 = static_cast(0.75) - SQR(i_center - di); + S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; } else if (shift_x == 0) { /* @@ -135,21 +134,23 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - i_min = i - dx_less_half + N_GHOSTS; - update_x2 = ZERO; + update_i2 = ZERO; - S0_0 = HALF * SQR(static_cast(1.5) - dx_prev_diff); - S0_1 = static_cast(0.75) - SQR(ONE - dx_prev_diff); - S0_2 = HALF * SQR(HALF - dx_prev_diff); + S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); + S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); + S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); S0_3 = ZERO; - S1_0 = HALF * SQR(static_cast(1.5) - dx_diff); - S1_1 = static_cast(0.75) - SQR(ONE - dx_diff); - S1_2 = HALF * SQR(HALF - dx_diff); + S1_0 = HALF * SQR(HALF + (i_center - di)); + S1_1 = static_cast(0.75) - SQR(i_center - di); + S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; } else { raise::Error("Invalid shift in indices", HERE); } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; } public: @@ -551,7 +552,7 @@ namespace kernel { i2(p), dx2(p), i2_prev(p), dx2_prev(p)); // clang-format on - + // Esirkepov 2001, Eq. 39 /* x - component @@ -649,8 +650,8 @@ namespace kernel { static_cast(i2(p) == i2_prev(p) - 1) * static_cast((1 - dx2(p)) + dx2_prev(p)); - const real_t Qdxdt = -coeff; // * inv_dt * delta_x; - const real_t Qdydt = -coeff; // * inv_dt * delta_y; + const real_t Qdxdt = -coeff; + const real_t Qdydt = -coeff; const real_t QVz = -coeff * vp[2]; // Esirkepov - Eq. 39 @@ -719,10 +720,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x3 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x3 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x3 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x3 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component @@ -768,7 +769,8 @@ namespace kernel { J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * QVz * Wz_3_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * + QVz * Wz_3_3; } else if constexpr (D == Dim::_3D) { // /* From 4ebb9944233984a244399a4ca62e21abb2c9d544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 21 May 2025 15:22:34 -0500 Subject: [PATCH 025/154] bugfixes --- src/kernels/currents_deposit.hpp | 42 +++++++++++--------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ace98f134..6be220256 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -81,17 +81,17 @@ namespace kernel { const int di_prev_less_half = static_cast( di_prev < static_cast(0.5)); - const auto shift_x = (i - di_less_half) - (i_prev - di_prev_less_half); + const auto shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); // find the minimum index of the shape function i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); // center index of the shape function - const auto i_center_prev = static_cast(i_min + 1 - i_prev); - const auto i_center = static_cast(i_min + 1 - i); + const auto i_center_prev = static_cast(1 - di_prev_less_half); + const auto i_center = static_cast(1 - di_less_half); // find indices and define shape function - if (shift_x > 0) { + if (shift_i > 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -109,7 +109,7 @@ namespace kernel { S1_1 = HALF * SQR(HALF + (i_center - di)); S1_2 = static_cast(0.75) - SQR(i_center - di); S1_3 = HALF * SQR(HALF - (i_center - di)); - } else if (shift_x < 0) { + } else if (shift_i < 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -127,7 +127,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(i_center - di); S1_2 = HALF * SQR(HALF - (i_center - di)); S1_3 = ZERO; - } else if (shift_x == 0) { + } else if (shift_i == 0) { /* (-1) 0 1 2 3 ___________________________________ @@ -553,7 +553,7 @@ namespace kernel { i2_prev(p), dx2_prev(p)); // clang-format on - // Esirkepov 2001, Eq. 39 + // Esirkepov 2001, Eq. 38 /* x - component */ @@ -636,23 +636,9 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const auto delta_x = static_cast(i1(p) == i1_prev(p)) * - static_cast(dx1(p) - dx1_prev(p)) + - static_cast(i1(p) == i1_prev(p) + 1) * - static_cast(dx1(p) + (1 - dx1_prev(p))) + - static_cast(i1(p) == i1_prev(p) - 1) * - static_cast((1 - dx1(p)) + dx1_prev(p)); - - const auto delta_y = static_cast(i2(p) == i2_prev(p)) * - static_cast(dx2(p) - dx2_prev(p)) + - static_cast(i2(p) == i2_prev(p) + 1) * - static_cast(dx2(p) + (1 - dx2_prev(p))) + - static_cast(i2(p) == i2_prev(p) - 1) * - static_cast((1 - dx2(p)) + dx2_prev(p)); - - const real_t Qdxdt = -coeff; - const real_t Qdydt = -coeff; - const real_t QVz = -coeff * vp[2]; + const real_t Qdxdt = -coeff * inv_dt; + const real_t Qdydt = -coeff * inv_dt; + const real_t QVz = -coeff * inv_dt * vp[2]; // Esirkepov - Eq. 39 // x-component @@ -720,10 +706,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component From 246b3e92bb5af3e923843555cb0d11406d8e5f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 08:58:18 -0500 Subject: [PATCH 026/154] simplification and bugfix in Wy --- src/kernels/currents_deposit.hpp | 93 ++++++++++++++++---------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 6be220256..ca18052c8 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -87,8 +87,9 @@ namespace kernel { i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); // center index of the shape function - const auto i_center_prev = static_cast(1 - di_prev_less_half); - const auto i_center = static_cast(1 - di_less_half); + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center = static_cast(1 - di_less_half) - di; // find indices and define shape function if (shift_i > 0) { @@ -100,15 +101,15 @@ namespace kernel { */ update_i2 = ONE; - S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_0 = HALF * SQR(HALF + di_center_prev); + S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + (i_center - di)); - S1_2 = static_cast(0.75) - SQR(i_center - di); - S1_3 = HALF * SQR(HALF - (i_center - di)); + S1_1 = HALF * SQR(HALF + di_center); + S1_2 = static_cast(0.75) - SQR(di_center); + S1_3 = HALF * SQR(HALF - di_center); } else if (shift_i < 0) { /* (-1) 0 1 2 3 @@ -119,13 +120,13 @@ namespace kernel { update_i2 = ONE; S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_2 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_3 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_1 = HALF * SQR(HALF + di_center_prev); + S0_2 = static_cast(0.75) - SQR(di_center_prev); + S0_3 = HALF * SQR(HALF - di_center_prev); - S1_0 = HALF * SQR(HALF + (i_center - di)); - S1_1 = static_cast(0.75) - SQR(i_center - di); - S1_2 = HALF * SQR(HALF - (i_center - di)); + S1_0 = HALF * SQR(HALF + di_center); + S1_1 = static_cast(0.75) - SQR(di_center); + S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else if (shift_i == 0) { /* @@ -136,14 +137,14 @@ namespace kernel { */ update_i2 = ZERO; - S0_0 = HALF * SQR(HALF + (i_center_prev - di_prev)); - S0_1 = static_cast(0.75) - SQR(i_center_prev - di_prev); - S0_2 = HALF * SQR(HALF - (i_center_prev - di_prev)); + S0_0 = HALF * SQR(HALF + di_center_prev); + S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; - S1_0 = HALF * SQR(HALF + (i_center - di)); - S1_1 = static_cast(0.75) - SQR(i_center - di); - S1_2 = HALF * SQR(HALF - (i_center - di)); + S1_0 = HALF * SQR(HALF + di_center); + S1_1 = static_cast(0.75) - SQR(di_center); + S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { raise::Error("Invalid shift in indices", HERE); @@ -579,25 +580,25 @@ namespace kernel { const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S0y_0 - S1y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S0y_1 - S1y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S0y_2 - S1y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S0y_3 - S1y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S0y_0 - S1y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S0y_1 - S1y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S0y_2 - S1y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S0y_3 - S1y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S0y_0 - S1y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S0y_1 - S1y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S0y_2 - S1y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S0y_3 - S1y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S0y_0 - S1y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S0y_1 - S1y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S0y_2 - S1y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S0y_3 - S1y_3); + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + @@ -636,9 +637,9 @@ namespace kernel { const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + S0y_3 * (HALF * S1x_3 + S0x_3)); - const real_t Qdxdt = -coeff * inv_dt; - const real_t Qdydt = -coeff * inv_dt; - const real_t QVz = -coeff * inv_dt * vp[2]; + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; // Esirkepov - Eq. 39 // x-component @@ -706,10 +707,10 @@ namespace kernel { J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; + // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; + // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; + // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; /* y - component From 23011799bbb8026dbcb77d3c9bed139eda52ad0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 09:20:21 -0500 Subject: [PATCH 027/154] bugfix in case comparison --- src/kernels/currents_deposit.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ca18052c8..ccfe72fe0 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -92,7 +92,7 @@ namespace kernel { const auto di_center = static_cast(1 - di_less_half) - di; // find indices and define shape function - if (shift_i > 0) { + if (shift_i == 1) { /* (-1) 0 1 2 3 ___________________________________ @@ -110,7 +110,7 @@ namespace kernel { S1_1 = HALF * SQR(HALF + di_center); S1_2 = static_cast(0.75) - SQR(di_center); S1_3 = HALF * SQR(HALF - di_center); - } else if (shift_i < 0) { + } else if (shift_i == -1) { /* (-1) 0 1 2 3 ___________________________________ @@ -128,6 +128,7 @@ namespace kernel { S1_1 = static_cast(0.75) - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; + } else if (shift_i == 0) { /* (-1) 0 1 2 3 From eb8c58e2e88bdeadd80bce06c9c9c7c1ff66a6ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 14:07:18 -0500 Subject: [PATCH 028/154] switch off formatting for large B updates --- src/kernels/faraday_mink.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index bea6be93e..cf6844a9d 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -70,13 +70,15 @@ namespace kernel::mink { Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - const auto alphax = ONE - THREE * deltax; + const auto alphax = ONE - THREE * deltax; + // clang-format off EB(i1, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) - + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); EB(i1, em::bx3) += coeff1 * ( - - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) - - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) + - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -86,7 +88,7 @@ namespace kernel::mink { if constexpr (D == Dim::_2D) { const auto alphax = ONE - TWO * betaxy - THREE * deltax; const auto alphay = ONE - TWO * betayx - THREE * deltay; - + // clang-format off EB(i1, i2, em::bx1) += coeff1 * ( - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) @@ -106,7 +108,7 @@ namespace kernel::mink { - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); - + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); } @@ -117,7 +119,7 @@ namespace kernel::mink { const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; - + // clang-format off EB(i1, i2, i3, em::bx1) += coeff1 * ( + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) @@ -157,6 +159,7 @@ namespace kernel::mink { - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } From 03aebb57be793fd35967f11d440145e0b8c31ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 17:52:24 -0500 Subject: [PATCH 029/154] optimizations and prep for 3rd order deposit --- src/kernels/currents_deposit.hpp | 575 ++++++++++++++++++++++++++++--- 1 file changed, 523 insertions(+), 52 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index ccfe72fe0..fc87a1b01 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -52,7 +52,7 @@ namespace kernel { real_t& S1_2, real_t& S1_3, ncells_t& i_min, - real_t& update_i2, + bool& update_i2, const index_t& i, const real_t& di, const index_t& i_prev, @@ -81,7 +81,7 @@ namespace kernel { const int di_prev_less_half = static_cast( di_prev < static_cast(0.5)); - const auto shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); // find the minimum index of the shape function i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); @@ -99,7 +99,7 @@ namespace kernel { | | x | x* | x* | * | // shift_i = 1 |______|______|______|______|______| */ - update_i2 = ONE; + update_i2 = true; S0_0 = HALF * SQR(HALF + di_center_prev); S0_1 = static_cast(0.75) - SQR(di_center_prev); @@ -117,7 +117,7 @@ namespace kernel { | * | x* | x* | x | | // shift_i = -1 |______|______|______|______|______| */ - update_i2 = ONE; + update_i2 = true; S0_0 = ZERO; S0_1 = HALF * SQR(HALF + di_center_prev); @@ -136,7 +136,7 @@ namespace kernel { | | x* | x* | x* | | // shift_i = 0 |______|______|______|______|______| */ - update_i2 = ZERO; + update_i2 = false; S0_0 = HALF * SQR(HALF + di_center_prev); S0_1 = static_cast(0.75) - SQR(di_center_prev); @@ -155,6 +155,143 @@ namespace kernel { i_min += N_GHOSTS; } + Inline void shape_function_3rd(real_t& S0_0, + real_t& S0_1, + real_t& S0_2, + real_t& S0_3, + real_t& S0_4, + real_t& S1_0, + real_t& S1_1, + real_t& S1_2, + real_t& S1_3, + real_t& S1_4, + ncells_t& i_min, + bool& update_i3, + const index_t& i, + const real_t& di, + const index_t& i_prev, + const real_t& di_prev) const { + /* + Shape function per particle is a 4 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + (-1) 0 1 2 3 4 + __________________________________________ + | | x* | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______|______| + | | x | x* | x* | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + | * | x* | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + // find shift in indices + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); + + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + + // find the minimum index of the shape function + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center_prev2 = SQR(di_center_prev); + const auto di_center_prev3 = di_center_prev2 * di_center_prev; + + const auto di_center = static_cast(1 - di_less_half) - di; + const auto di_center2 = SQR(di_center); + const auto di_center3 = di_center2 * di_center; + + // find indices and define shape function + if (shift_i == 1) { + /* + (-1) 0 1 2 3 4 + __________________________________________ + | | x | x* | x* | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + */ + update_i3 = true; + + S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_2 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_3 = static_cast(1 / 6) * di_center_prev3; + S0_4 = ZERO; + + S1_0 = ZERO; + S1_1 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_2 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_3 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_4 = static_cast(1 / 6) * di_center3; + } else if (shift_i == -1) { + /* + (-1) 0 1 2 3 4 + _________________________________________ + | * | x* | x* | x* | x | | // shift_i = -1 + |______|______|______|______|______|_____| + */ + update_i3 = true; + + S0_0 = ZERO; + S0_1 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_2 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_3 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_4 = static_cast(1 / 6) * di_center_prev3; + + S1_0 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_2 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_3 = static_cast(1 / 6) * di_center3; + S1_4 = ZERO; + + } else if (shift_i == 0) { + /* + (-1) 0 1 2 3 4 + __________________________________________ + | | x* | x* | x* | x* | | // shift_i = 0 + |______|______|______|______|______|______| + */ + update_i3 = false; + + S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - + HALF * (di_center_prev - di_center_prev2); + S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; + S0_2 = static_cast(1 / 6) + + HALF * (di_center_prev + di_center_prev2 - di_center_prev3); + S0_3 = static_cast(1 / 6) * di_center_prev3; + S0_4 = ZERO; + + S1_0 = static_cast(1 / 6) * (ONE - di_center3) - + HALF * (di_center - di_center2); + S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; + S1_2 = static_cast(1 / 6) + + HALF * (di_center + di_center2 - di_center3); + S1_3 = static_cast(1 / 6) * di_center3; + S1_4 = ZERO; + } else { + raise::Error("Invalid shift in indices", HERE); + } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; + } + public: /** * @brief explicit constructor. @@ -521,7 +658,7 @@ namespace kernel { real_t S1x_0, S1x_1, S1x_2, S1x_3; // indices of the shape function ncells_t ix_min; - real_t update_x2; + bool update_x2; // find indices and define shape function // clang-format off shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, @@ -545,7 +682,7 @@ namespace kernel { real_t S1y_0, S1y_1, S1y_2, S1y_3; // indices of the shape function ncells_t iy_min; - real_t update_y2; + bool update_y2; // find indices and define shape function // clang-format off shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, @@ -575,31 +712,22 @@ namespace kernel { const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - // Unrolled calculations for Wy const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); // Unrolled calculations for Wz const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + @@ -647,43 +775,35 @@ namespace kernel { const auto jx_0_0 = -Qdxdt * Wx_0_0; const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; const auto jx_0_1 = -Qdxdt * Wx_0_1; const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; const auto jx_0_2 = -Qdxdt * Wx_0_2; const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; const auto jx_0_3 = -Qdxdt * Wx_0_3; const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; - const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; // y-component const auto jy_0_0 = -Qdydt * Wy_0_0; const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; const auto jy_1_0 = -Qdydt * Wy_1_0; const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; const auto jy_2_0 = -Qdydt * Wy_2_0; const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; - const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; const auto jy_3_0 = -Qdydt * Wy_3_0; const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; /* Current update @@ -696,22 +816,25 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += update_y2 * jx_0_3; J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += update_y2 * jx_1_3; - J_acc(ix_min + 2, iy_min, cur::jx1) += update_x2 * jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += update_x2 * jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += update_x2 * jx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += update_x2 * update_y2 * jx_2_3; + if (update_x2) { + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; + } + + if (update_y2) { + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; + } - // J_acc(ix_min + 3, iy_min, cur::jx1) += update_x2 * jx_3_0; - // J_acc(ix_min + 3, iy_min + 1, cur::jx1) += update_x2 * jx_3_1; - // J_acc(ix_min + 3, iy_min + 2, cur::jx1) += update_x2 * jx_3_2; - // J_acc(ix_min + 3, iy_min + 3, cur::jx1) += update_x2 * jx_3_3; + if (update_x2 && update_y2) { + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; + } /* y - component @@ -719,46 +842,54 @@ namespace kernel { J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += update_x2 * jy_3_0; J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += update_x2 * jy_3_1; - J_acc(ix_min, iy_min + 2, cur::jx2) += update_y2 * jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += update_y2 * jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += update_y2 * jy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += update_y2 * update_x2 * jy_3_2; + if (update_x2) { + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; + } - // J_acc(ix_min, iy_min + 3, cur::jx2) += update_y3 * jy_0_3; - // J_acc(ix_min + 1, iy_min + 3, cur::jx2) += update_y3 * jy_1_3; - // J_acc(ix_min + 2, iy_min + 3, cur::jx2) += update_y3 * jy_2_3; - // J_acc(ix_min + 3, iy_min + 3, cur::jx2) += update_x3 * jy_3_3; + if (update_y2) { + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; + } + if (update_x2 && update_y2) { + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; + } /* z - component, simulated direction */ J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_0_3; J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_1_3; J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += update_y2 * QVz * Wz_2_3; - J_acc(ix_min + 3, iy_min, cur::jx3) += update_x2 * QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += update_x2 * QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += update_x2 * QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += update_x2 * update_y2 * - QVz * Wz_3_3; + if (update_x2) { + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + } + + if (update_y2) { + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + } + if (update_x2 && update_y2) { + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + } } else if constexpr (D == Dim::_3D) { // /* @@ -1720,6 +1851,346 @@ namespace kernel { // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; } // dimension + + } else if constexpr (O == 3u) { + /* + Higher order charge conserving current deposition based on + Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + + We need to define the follwowing variable: + - Shape functions in spatial directions for the particle position + before and after the current timestep. + S0_*, S1_* + - Density composition matrix + Wx_*, Wy_*, Wz_* + */ + + /* + x - direction + */ + + // shape function at previous timestep + real_t S0x_0, S0x_1, S0x_2, S0x_3, S0x_4; + // shape function at current timestep + real_t S1x_0, S1x_1, S1x_2, S1x_3, S1x_4; + // indices of the shape function + ncells_t ix_min; + bool update_x3; + // find indices and define shape function + // clang-format off + shape_function_3rd(S0x_0, S0x_1, S0x_2, S0x_3, S0x_4, + S1x_0, S1x_1, S1x_2, S1x_3, S1x_4, + ix_min, update_x3, + i1(p), dx1(p), + i1_prev(p), dx1_prev(p)); + // clang-format on + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { + + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3, S0y_4; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3, S1y_4; + // indices of the shape function + ncells_t iy_min; + bool update_y3; + // find indices and define shape function + // clang-format off + shape_function_3rd(S0y_0, S0y_1, S0y_2, S0y_3, S0y_4, + S1y_0, S1y_1, S1y_2, S1y_3, S1y_4, + iy_min, update_y3, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + + // Esirkepov 2001, Eq. 38 + /* + x - component + */ + // Calculate weight function - unrolled + const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); + const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); + const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); + const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); + const auto Wx_0_4 = HALF * (S1x_0 - S0x_0) * (S0y_4 + S1y_4); + + const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); + const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); + const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); + const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); + const auto Wx_1_4 = HALF * (S1x_1 - S0x_1) * (S0y_4 + S1y_4); + + const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); + const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); + const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); + const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); + const auto Wx_2_4 = HALF * (S1x_2 - S0x_2) * (S0y_4 + S1y_4); + + const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); + const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); + const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); + const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); + const auto Wx_3_4 = HALF * (S1x_3 - S0x_3) * (S0y_4 + S1y_4); + + // Unrolled calculations for Wy + const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); + const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); + const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); + const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); + + const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); + const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); + const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); + const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); + + const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); + const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); + const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); + const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); + + const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); + const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); + const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); + const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); + + const auto Wy_4_0 = HALF * (S1x_4 + S0x_4) * (S1y_0 - S0y_0); + const auto Wy_4_1 = HALF * (S1x_4 + S0x_4) * (S1y_1 - S0y_1); + const auto Wy_4_2 = HALF * (S1x_4 + S0x_4) * (S1y_2 - S0y_2); + const auto Wy_4_3 = HALF * (S1x_4 + S0x_4) * (S1y_3 - S0y_3); + + // Unrolled calculations for Wz + const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + + S0y_0 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + + S0y_1 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + + S0y_2 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + + S0y_3 * (HALF * S1x_0 + S0x_0)); + const auto Wz_0_4 = THIRD * (S1y_4 * (HALF * S0x_0 + S1x_0) + + S0y_4 * (HALF * S1x_0 + S0x_0)); + + const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + + S0y_0 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + + S0y_1 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + + S0y_2 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + + S0y_3 * (HALF * S1x_1 + S0x_1)); + const auto Wz_1_4 = THIRD * (S1y_4 * (HALF * S0x_1 + S1x_1) + + S0y_4 * (HALF * S1x_1 + S0x_1)); + + const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + + S0y_0 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + + S0y_1 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + + S0y_2 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + + S0y_3 * (HALF * S1x_2 + S0x_2)); + const auto Wz_2_4 = THIRD * (S1y_4 * (HALF * S0x_2 + S1x_2) + + S0y_4 * (HALF * S1x_2 + S0x_2)); + + const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + + S0y_0 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + + S0y_1 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + + S0y_2 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + + S0y_3 * (HALF * S1x_3 + S0x_3)); + const auto Wz_3_4 = THIRD * (S1y_4 * (HALF * S0x_3 + S1x_3) + + S0y_4 * (HALF * S1x_3 + S0x_3)); + + const auto Wz_4_0 = THIRD * (S1y_0 * (HALF * S0x_4 + S1x_4) + + S0y_0 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_1 = THIRD * (S1y_1 * (HALF * S0x_4 + S1x_4) + + S0y_1 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_2 = THIRD * (S1y_2 * (HALF * S0x_4 + S1x_4) + + S0y_2 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_3 = THIRD * (S1y_3 * (HALF * S0x_4 + S1x_4) + + S0y_3 * (HALF * S1x_4 + S0x_4)); + const auto Wz_4_4 = THIRD * (S1y_4 * (HALF * S0x_4 + S1x_4) + + S0y_4 * (HALF * S1x_4 + S0x_4)); + + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; + + // Esirkepov - Eq. 39 + // x-component + const auto jx_0_0 = -Qdxdt * Wx_0_0; + const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; + const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; + const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; + + const auto jx_0_1 = -Qdxdt * Wx_0_1; + const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; + const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; + const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; + + const auto jx_0_2 = -Qdxdt * Wx_0_2; + const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; + const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; + const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; + + const auto jx_0_3 = -Qdxdt * Wx_0_3; + const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; + const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; + + const auto jx_0_4 = -Qdxdt * Wx_0_4; + const auto jx_1_4 = jx_0_4 - Qdxdt * Wx_1_4; + const auto jx_2_4 = jx_1_4 - Qdxdt * Wx_2_4; + const auto jx_3_4 = jx_2_4 - Qdxdt * Wx_3_4; + + // y-component + const auto jy_0_0 = -Qdydt * Wy_0_0; + const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; + const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; + const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; + + const auto jy_1_0 = -Qdydt * Wy_1_0; + const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; + const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; + const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; + + const auto jy_2_0 = -Qdydt * Wy_2_0; + const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; + const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; + + const auto jy_3_0 = -Qdydt * Wy_3_0; + const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; + const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; + const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; + + const auto jy_4_0 = -Qdydt * Wy_4_0; + const auto jy_4_1 = jy_4_0 - Qdydt * Wy_4_1; + const auto jy_4_2 = jy_4_1 - Qdydt * Wy_4_2; + const auto jy_4_3 = jy_4_2 - Qdydt * Wy_4_3; + + /* + Current update + */ + auto J_acc = J.access(); + + /* + x - component + */ + J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; + J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; + J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; + J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; + + if (update_x3) { + J_acc(ix_min + 3, iy_min, cur::jx1) += jx_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_3_3; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 4, cur::jx1) += jx_0_4; + J_acc(ix_min + 1, iy_min + 4, cur::jx1) += jx_1_4; + J_acc(ix_min + 2, iy_min + 4, cur::jx1) += jx_2_4; + } + + if (update_x3 && update_y3) { + J_acc(ix_min + 3, iy_min + 4, cur::jx1) += jx_3_4; + } + + /* + y - component + */ + J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; + J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; + J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; + J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; + + J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; + J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; + J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; + J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; + + J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; + J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; + J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; + J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; + + if (update_x3) { + J_acc(ix_min + 4, iy_min, cur::jx2) += jy_4_0; + J_acc(ix_min + 4, iy_min + 1, cur::jx2) += jy_4_1; + J_acc(ix_min + 4, iy_min + 2, cur::jx2) += jy_4_2; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 3, cur::jx2) += jy_0_3; + J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_1_3; + J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_2_3; + J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_3_3; + } + + if (update_x3 && update_y3) { + J_acc(ix_min + 4, iy_min + 3, cur::jx2) += jy_4_3; + } + /* + z - component, simulated direction + */ + J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; + J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; + J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; + J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; + + J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; + J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; + J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; + J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; + + J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; + J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; + J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; + J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; + + J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; + J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; + J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; + J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; + + if (update_x3) { + J_acc(ix_min + 4, iy_min, cur::jx3) += QVz * Wz_4_0; + J_acc(ix_min + 4, iy_min + 1, cur::jx3) += QVz * Wz_4_1; + J_acc(ix_min + 4, iy_min + 2, cur::jx3) += QVz * Wz_4_2; + J_acc(ix_min + 4, iy_min + 3, cur::jx3) += QVz * Wz_4_3; + } + + if (update_y3) { + J_acc(ix_min, iy_min + 4, cur::jx3) += QVz * Wz_0_4; + J_acc(ix_min + 1, iy_min + 4, cur::jx3) += QVz * Wz_1_4; + J_acc(ix_min + 2, iy_min + 4, cur::jx3) += QVz * Wz_2_4; + J_acc(ix_min + 3, iy_min + 4, cur::jx3) += QVz * Wz_3_4; + } + if (update_x3 && update_y3) { + J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; + } + } // dim } else { // order raise::KernelError(HERE, "Unsupported interpolation order"); } From bd2f333295ddc56e9a0cf52efb9058799304e254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 22:27:22 -0500 Subject: [PATCH 030/154] update 3D 2nd order deposit --- src/kernels/currents_deposit.hpp | 2000 ++++++++++++++++-------------- 1 file changed, 1046 insertions(+), 954 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index fc87a1b01..0a024f713 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -892,964 +892,1056 @@ namespace kernel { } } else if constexpr (D == Dim::_3D) { - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0y_0, S0y_1, S0y_2, S0y_3; - // // shape function at current timestep - // real_t S1y_0, S1y_1, S1y_2, S1y_3; - // // indices of the shape function - // uint iy_min; - // // find indices and define shape function - // shape_function_2nd(S0y_0, - // S0y_1, - // S0y_2, - // S0y_3, - // S1y_0, - // S1y_1, - // S1y_2, - // S1y_3, - // iy_min, - // i2(p), - // dx2(p), - // i2_prev(p), - // dx2_prev(p)); - // - // /* - // z - direction - // */ - // - // // shape function at previous timestep - // real_t S0z_0, S0z_1, S0z_2, S0z_3; - // // shape function at current timestep - // real_t S1z_0, S1z_1, S1z_2, S1z_3; - // // indices of the shape function - // uint iz_min; - // // find indices and define shape function - // shape_function_2nd(S0z_0, - // S0z_1, - // S0z_2, - // S0z_3, - // S1z_0, - // S1z_1, - // S1z_2, - // S1z_3, - // iz_min, - // i3(p), - // dx3(p), - // i3_prev(p), - // dx3_prev(p)); - // - // // Calculate weight function - // // for (int i = 0; i < interp_order + 2; ++i) { - // // for (int j = 0; j < interp_order + 2; ++j) { - // // for (int k = 0; k < interp_order + 2; ++k) { - // // // Esirkepov 2001, Eq. 31 - // // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - // // - // // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - // // - // // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // // } - // // } - // // } - // // - // // Unrolled calculations for Wx, Wy, and Wz - // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const real_t Qdxdt = coeff * inv_dt * dxp_r_1; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx1) += Qdxdt * Wx_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx1) += Qdxdt * Wx_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx1) += Qdxdt * Wx_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx1) += Qdxdt * Wx_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx1) += Qdxdt * Wx_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx1) += Qdxdt * Wx_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx1) += Qdxdt * Wx_3_3_3; - // - // /* - // y-component - // */ - // // i = 0 - // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt * dxp_r_2; - // - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += Qdydt * Wy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += Qdydt * Wy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += Qdydt * Wy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += Qdydt * Wy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += Qdydt * Wy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += Qdydt * Wy_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += Qdydt * Wy_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += Qdydt * Wy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += Qdydt * Wy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += Qdydt * Wy_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += Qdydt * Wy_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += Qdydt * Wy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += Qdydt * Wy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += Qdydt * Wy_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += Qdydt * Wy_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx2) += Qdydt * Wy_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx2) += Qdydt * Wy_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx2) += Qdydt * Wy_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx2) += Qdydt * Wy_3_3_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + /* + y - direction + */ + + // shape function at previous timestep + real_t S0y_0, S0y_1, S0y_2, S0y_3; + // shape function at current timestep + real_t S1y_0, S1y_1, S1y_2, S1y_3; + // indices of the shape function + ncells_t iy_min; + bool update_y2; + // find indices and define shape function + // clang-format off + shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + S1y_0, S1y_1, S1y_2, S1y_3, + iy_min, update_y2, + i2(p), dx2(p), + i2_prev(p), dx2_prev(p)); + // clang-format on + + /* + y - direction + */ + + // shape function at previous timestep + real_t S0z_0, S0z_1, S0z_2, S0z_3; + // shape function at current timestep + real_t S1z_0, S1z_1, S1z_2, S1z_3; + // indices of the shape function + ncells_t iz_min; + bool update_z2; + // find indices and define shape function + // clang-format off + shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, + S1z_0, S1z_1, S1z_2, S1z_3, + iz_min, update_z2, + i3(p), dx3(p), + i3_prev(p), dx3_prev(p)); + // clang-format on + + // Calculate weight function + // for (int i = 0; i < interp_order + 2; ++i) { + // for (int j = 0; j < interp_order + 2; ++j) { + // for (int k = 0; k < interp_order + 2; ++k) { + // // Esirkepov 2001, Eq. 31 + // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_3 = THIRD * (S1z_3 - S0z_3) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + // } + // } + // } // - // const real_t Qdzdt = coeff * inv_dt * dxp_r_3; + + // Unrolled calculations for Wx, Wy, and Wz + // clang-format off + const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + + const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + + const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + + const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * + ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + + const real_t Qdxdt = coeff * inv_dt; + + const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; + const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; + const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; + const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; + const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; + const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; + const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; + const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; + const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; + const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; + const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; + const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; + + const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; + const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; + const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; + const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; + const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; + const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; + const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; + const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; + const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; + const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; + const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; + const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; + + const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; + const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; + const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; + const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; + const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; + const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; + const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; + const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; + const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; + const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; + const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; + const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; + + const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; + const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; + const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; + const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; + const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; + const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; + const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; + const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; + const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; + const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; + const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; + const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; + + /* + y-component + */ + const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + + const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + + const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * + (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + + const real_t Qdydt = coeff * inv_dt; + + const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; + const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; + const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; + const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; + const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; + const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; + const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; + const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; + const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; + const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; + const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; + const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; + + const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; + const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; + const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; + const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; + const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; + const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; + const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; + const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; + const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; + const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; + const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; + const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; + + const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; + const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; + const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; + const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; + const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; + const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; + const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; + const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; + const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; + const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; + const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; + const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; + + const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; + const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; + const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; + const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; + const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; + const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; + const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; + const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; + const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; + const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; + const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; + const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; + + /* + z - component + */ + const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + + const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + + const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + + const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + + // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + + const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + + const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + + const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + + // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 + const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + + const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + + const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + + const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + + // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + + const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + + const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + + const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + + const real_t Qdzdt = coeff * inv_dt; + + const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; + const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; + const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; + const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; + const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; + const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; + const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; + const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; + const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; + const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; + const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; + const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; + + const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; + const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; + const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; + const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; + const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; + const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; + const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; + const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; + const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; + const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; + const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; + const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; + + const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; + const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; + const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; + const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; + const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; + const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; + const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; + const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; + const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; + const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; + const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; + const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; + + const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; + const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; + const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; + const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; + const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; + const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; + const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; + const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; + const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; + const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; + const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; + const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; + + + /* + Current update + */ + auto J_acc = J.access(); + + J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; + + if (update_x2) + { + J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; + + if (update_y2) + { + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; + } + + if (update_z2) + { + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; + + if (update_y2) + { + J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; + } + } + } // - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_0_2; - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_0_3; - // // - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_1_2; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_1_3; - // // - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_2_2; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_2_3; - // // - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_0_3_2; - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_0_3_3; - // // - // // - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_0_2; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_0_3; - // // - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_1_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_1_3; - // // - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_2_3; - // // - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_1_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_1_3_3; - // // - // // - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_0_2; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_0_3; - // // - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_1_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_1_3; - // // - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_2_3; - // // - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_2_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_2_3_3; - // // - // // - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += Qdzdt * Wz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_0_2; - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_0_3; - // // - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += Qdzdt * Wz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_1_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_1_3; - // // - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += Qdzdt * Wz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_2_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_2_3; - // // - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += Qdzdt * Wz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += Qdzdt * Wz_3_3_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += Qdzdt * Wz_3_3_2; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 3, cur::jx3) += Qdzdt * Wz_3_3_3; + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; + J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; + } + } + + + /* + y-component + */ + J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; + J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; + + if (update_z2) + { + J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; + J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; + } + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; + + if (update_z2) + { + J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; + J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; + } + } + + if (update_z2) + { + J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; + J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; + } + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; + J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; + J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; + J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; + J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; + J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; + } + + /* + z-component + */ + J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; + J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; + J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; + J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; + J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; + J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; + J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; + J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; + J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; + J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; + J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; + J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; + J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; + J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; + J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; + J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; + J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; + J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; + J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; + J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; + J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; + J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; + J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; + J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; + J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; + J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; + J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; + J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; + J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; + J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; + } + + if (update_z2) + { + J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; + J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; + J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; + J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; + J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; + J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; + J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; + J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; + J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; + + if (update_x2) + { + J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; + J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; + J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; + + if (update_y2) + { + J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; + } + } + + if (update_y2) + { + J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; + J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; + J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; + } + } + // clang-format on } // dimension } else if constexpr (O == 3u) { From 7e52c03a44dd0bbdea196bb6ebea0184621e14b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 22 May 2025 22:31:05 -0500 Subject: [PATCH 031/154] removed redundant terms --- src/kernels/currents_deposit.hpp | 106 +------------------------------ 1 file changed, 1 insertion(+), 105 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0a024f713..257e3e8f6 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -1110,58 +1110,6 @@ namespace kernel { const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_3_0_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_3_0_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_3_0_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_3_0_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_3_1_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_3_1_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_3_1_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_3_1_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_3_2_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_3_2_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_3_2_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_3_2_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_3_3_0 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_3_3_1 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_3_3_2 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_3_3_3 = THIRD * (S1x_3 - S0x_3) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); const real_t Qdxdt = coeff * inv_dt; @@ -1259,19 +1207,6 @@ namespace kernel { (S0x_0 * S0z_3 + S1x_0 * S1z_3 + HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - const auto Wy_0_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_1 * S0z_0 + S1x_1 * S1z_0 + HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); @@ -1310,20 +1245,7 @@ namespace kernel { const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * (S0x_1 * S0z_3 + S1x_1 * S1z_3 + HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - + const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_2 * S0z_0 + S1x_2 * S1z_0 + HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); @@ -1363,19 +1285,6 @@ namespace kernel { (S0x_2 * S0z_3 + S1x_2 * S1z_3 + HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - const auto Wy_2_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * (S0x_3 * S0z_0 + S1x_3 * S1z_0 + HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); @@ -1415,19 +1324,6 @@ namespace kernel { (S0x_3 * S0z_3 + S1x_3 * S1z_3 + HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - const auto Wy_3_3_0 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_3_1 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_3_2 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_3_3 = THIRD * (S1y_3 - S0y_3) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - const real_t Qdydt = coeff * inv_dt; const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; From f1684262b0291663245092e6a3cd7b760f14df5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 23 May 2025 15:08:14 -0500 Subject: [PATCH 032/154] 2nd order particle pusher --- src/kernels/particle_pusher_sr.hpp | 485 +++++++++++++++++++++++++++++ 1 file changed, 485 insertions(+) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 831d070ec..2d89f5875 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -476,6 +476,9 @@ namespace kernel::sr { bool is_gca { false }; getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + //getInterpFlds2nd(p, ei, bi); + metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); if (cooling != 0) { @@ -1090,6 +1093,488 @@ namespace kernel::sr { } } + Inline void getInterpFlds2nd(index_t& p, + vec_t& e0, + vec_t& b0) const { + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + + // Compute weights for second-order interpolation + real_t w0 = HALF * SQR(HALF - dx1_); + real_t w1 = static_cast(0.75) - SQR(dx1_); + real_t w2 = HALF * SQR(HALF + dx1_); + + // Ex1 (dual grid) + real_t c0 = EB(i - 1, em::ex1); // First grid point + real_t c1 = EB(i, em::ex1); // Second grid point + real_t c2 = EB(i + 1, em::ex1); // Third grid point + e0[0] = c0 * w0 + c1 * w1 + c2 * w2; + + // Ex2 (primal grid) + c0 = EB(i - 1, em::ex2); // First grid point + c1 = EB(i, em::ex2); // Second grid point + c2 = EB(i + 1, em::ex2); // Third grid point + e0[1] = c0 * w0 + c1 * w1 + c2 * w2; + + // Ex3 (primal grid) + c0 = EB(i - 1, em::ex3); // First grid point + c1 = EB(i, em::ex3); // Second grid point + c2 = EB(i + 1, em::ex3); // Third grid point + e0[2] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx1 (primal grid) + c0 = EB(i - 1, em::bx1); // First grid point + c1 = EB(i, em::bx1); // Second grid point + c2 = EB(i + 1, em::bx1); // Third grid point + b0[0] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx2 (dual grid) + c0 = EB(i - 2, em::bx2); // First grid point + c1 = EB(i - 1, em::bx2); // Second grid point + c2 = EB(i, em::bx2); // Third grid point + b0[1] = c0 * w0 + c1 * w1 + c2 * w2; + + // Bx3 (dual grid) + c0 = EB(i - 2, em::bx3); // First grid point + c1 = EB(i - 1, em::bx3); // Second grid point + c2 = EB(i, em::bx3); // Third grid point + b0[2] = c0 * w0 + c1 * w1 + c2 * w2; + + } else if constexpr (D == Dim::_2D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // Compute weights for second-order interpolation + real_t w0x = HALF * SQR(HALF - dx1_); + real_t w1x = static_cast(0.75) - SQR(dx1_); + real_t w2x = HALF * SQR(HALF + dx1_); + + real_t w0y = HALF * SQR(HALF - dx2_); + real_t w1y = static_cast(0.75) - SQR(dx2_); + real_t w2y = HALF * SQR(HALF + dx2_); + + // Ex1 + // Interpolate --- (dual, primal) + real_t c000 = EB(i - 1, j - 1, em::ex1); + real_t c100 = EB(i, j - 1, em::ex1); + real_t c200 = EB(i + 1, j - 1, em::ex1); + real_t c010 = EB(i - 1, j, em::ex1); + real_t c110 = EB(i, j, em::ex1); + real_t c210 = EB(i + 1, j, em::ex1); + real_t c020 = EB(i - 1, j + 1, em::ex1); + real_t c120 = EB(i, j + 1, em::ex1); + real_t c220 = EB(i + 1, j + 1, em::ex1); + + real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; + real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; + real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Ex2 + // Interpolate --- (primal, dual) + c000 = EB(i - 1, j - 1, em::ex2); + c100 = EB(i, j - 1, em::ex2); + c200 = EB(i + 1, j - 1, em::ex2); + c010 = EB(i - 1, j, em::ex2); + c110 = EB(i, j, em::ex2); + c210 = EB(i + 1, j, em::ex2); + c020 = EB(i - 1, j + 1, em::ex2); + c120 = EB(i, j + 1, em::ex2); + c220 = EB(i + 1, j + 1, em::ex2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Ex3 + // Interpolate --- (primal, primal) + c000 = EB(i - 1, j - 1, em::ex3); + c100 = EB(i, j - 1, em::ex3); + c200 = EB(i + 1, j - 1, em::ex3); + c010 = EB(i - 1, j, em::ex3); + c110 = EB(i, j, em::ex3); + c210 = EB(i + 1, j, em::ex3); + c020 = EB(i - 1, j + 1, em::ex3); + c120 = EB(i, j + 1, em::ex3); + c220 = EB(i + 1, j + 1, em::ex3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + e0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx1 + // Interpolate --- (primal, dual) + c000 = EB(i - 1, j - 1, em::bx1); + c100 = EB(i, j - 1, em::bx1); + c200 = EB(i + 1, j - 1, em::bx1); + c010 = EB(i - 1, j, em::bx1); + c110 = EB(i, j, em::bx1); + c210 = EB(i + 1, j, em::bx1); + c020 = EB(i - 1, j + 1, em::bx1); + c120 = EB(i, j + 1, em::bx1); + c220 = EB(i + 1, j + 1, em::bx1); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx2 + // Interpolate --- (dual, primal) + c000 = EB(i - 1, j - 1, em::bx2); + c100 = EB(i, j - 1, em::bx2); + c200 = EB(i + 1, j - 1, em::bx2); + c010 = EB(i - 1, j, em::bx2); + c110 = EB(i, j, em::bx2); + c210 = EB(i + 1, j, em::bx2); + c020 = EB(i - 1, j + 1, em::bx2); + c120 = EB(i, j + 1, em::bx2); + c220 = EB(i + 1, j + 1, em::bx2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + + // Bx3 + // Interpolate --- (dual, dual) + c000 = EB(i - 1, j - 1, em::bx3); + c100 = EB(i, j - 1, em::bx3); + c200 = EB(i + 1, j - 1, em::bx3); + c010 = EB(i - 1, j, em::bx3); + c110 = EB(i, j, em::bx3); + c210 = EB(i + 1, j, em::bx3); + c020 = EB(i - 1, j + 1, em::bx3); + c120 = EB(i, j + 1, em::bx3); + c220 = EB(i + 1, j + 1, em::bx3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + b0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + + } else if constexpr (D == Dim::_3D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // Compute weights for second-order interpolation + real_t w0x = HALF * SQR(HALF - dx1_); + real_t w1x = static_cast(0.75) - SQR(dx1_); + real_t w2x = HALF * SQR(HALF + dx1_); + + real_t w0y = HALF * SQR(HALF - dx2_); + real_t w1y = static_cast(0.75) - SQR(dx2_); + real_t w2y = HALF * SQR(HALF + dx2_); + + real_t w0z = HALF * SQR(HALF - dx3_); + real_t w1z = static_cast(0.75) - SQR(dx3_); + real_t w2z = HALF * SQR(HALF + dx3_); + + // Ex1 + // Interpolate --- (dual, primal, primal) + real_t c000 = EB(i - 1, j - 1, k - 1, em::ex1); + real_t c100 = EB(i, j - 1, k - 1, em::ex1); + real_t c200 = EB(i + 1, j - 1, k - 1, em::ex1); + real_t c010 = EB(i - 1, j, k - 1, em::ex1); + real_t c110 = EB(i, j, k - 1, em::ex1); + real_t c210 = EB(i + 1, j, k - 1, em::ex1); + real_t c020 = EB(i - 1, j + 1, k - 1, em::ex1); + real_t c120 = EB(i, j + 1, k - 1, em::ex1); + real_t c220 = EB(i + 1, j + 1, k - 1, em::ex1); + + real_t c001 = EB(i - 1, j - 1, k, em::ex1); + real_t c101 = EB(i, j - 1, k, em::ex1); + real_t c201 = EB(i + 1, j - 1, k, em::ex1); + real_t c011 = EB(i - 1, j, k, em::ex1); + real_t c111 = EB(i, j, k, em::ex1); + real_t c211 = EB(i + 1, j, k, em::ex1); + real_t c021 = EB(i - 1, j + 1, k, em::ex1); + real_t c121 = EB(i, j + 1, k, em::ex1); + real_t c221 = EB(i + 1, j + 1, k, em::ex1); + + real_t c002 = EB(i - 1, j - 1, k + 1, em::ex1); + real_t c102 = EB(i, j - 1, k + 1, em::ex1); + real_t c202 = EB(i + 1, j - 1, k + 1, em::ex1); + real_t c012 = EB(i - 1, j, k + 1, em::ex1); + real_t c112 = EB(i, j, k + 1, em::ex1); + real_t c212 = EB(i + 1, j, k + 1, em::ex1); + real_t c022 = EB(i - 1, j + 1, k + 1, em::ex1); + real_t c122 = EB(i, j + 1, k + 1, em::ex1); + real_t c222 = EB(i + 1, j + 1, k + 1, em::ex1); + + real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; + real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; + real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; + real_t c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + real_t c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + real_t c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Ex2 + // Interpolate -- (primal, dual, primal) + c000 = EB(i - 1, j - 1, k - 1, em::ex2); + c100 = EB(i, j - 1, k - 1, em::ex2); + c200 = EB(i + 1, j - 1, k - 1, em::ex2); + c010 = EB(i - 1, j, k - 1, em::ex2); + c110 = EB(i, j, k - 1, em::ex2); + c210 = EB(i + 1, j, k - 1, em::ex2); + c020 = EB(i - 1, j + 1, k - 1, em::ex2); + c120 = EB(i, j + 1, k - 1, em::ex2); + c220 = EB(i + 1, j + 1, k - 1, em::ex2); + + c001 = EB(i - 1, j - 1, k, em::ex2); + c101 = EB(i, j - 1, k, em::ex2); + c201 = EB(i + 1, j - 1, k, em::ex2); + c011 = EB(i - 1, j, k, em::ex2); + c111 = EB(i, j, k, em::ex2); + c211 = EB(i + 1, j, k, em::ex2); + c021 = EB(i - 1, j + 1, k, em::ex2); + c121 = EB(i, j + 1, k, em::ex2); + c221 = EB(i + 1, j + 1, k, em::ex2); + + c002 = EB(i - 1, j - 1, k + 1, em::ex2); + c102 = EB(i, j - 1, k + 1, em::ex2); + c202 = EB(i + 1, j - 1, k + 1, em::ex2); + c012 = EB(i - 1, j, k + 1, em::ex2); + c112 = EB(i, j, k + 1, em::ex2); + c212 = EB(i + 1, j, k + 1, em::ex2); + c022 = EB(i - 1, j + 1, k + 1, em::ex2); + c122 = EB(i, j + 1, k + 1, em::ex2); + c222 = EB(i + 1, j + 1, k + 1, em::ex2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Ex3 + // Interpolate -- (primal, primal, dual) + c000 = EB(i - 1, j - 1, k - 1, em::ex3); + c100 = EB(i, j - 1, k - 1, em::ex3); + c200 = EB(i + 1, j - 1, k - 1, em::ex3); + c010 = EB(i - 1, j, k - 1, em::ex3); + c110 = EB(i, j, k - 1, em::ex3); + c210 = EB(i + 1, j, k - 1, em::ex3); + c020 = EB(i - 1, j + 1, k - 1, em::ex3); + c120 = EB(i, j + 1, k - 1, em::ex3); + c220 = EB(i + 1, j + 1, k - 1, em::ex3); + + c001 = EB(i - 1, j - 1, k, em::ex3); + c101 = EB(i, j - 1, k, em::ex3); + c201 = EB(i + 1, j - 1, k, em::ex3); + c011 = EB(i - 1, j, k, em::ex3); + c111 = EB(i, j, k, em::ex3); + c211 = EB(i + 1, j, k, em::ex3); + c021 = EB(i - 1, j + 1, k, em::ex3); + c121 = EB(i, j + 1, k, em::ex3); + c221 = EB(i + 1, j + 1, k, em::ex3); + + c002 = EB(i - 1, j - 1, k + 1, em::ex3); + c102 = EB(i, j - 1, k + 1, em::ex3); + c202 = EB(i + 1, j - 1, k + 1, em::ex3); + c012 = EB(i - 1, j, k + 1, em::ex3); + c112 = EB(i, j, k + 1, em::ex3); + c212 = EB(i + 1, j, k + 1, em::ex3); + c022 = EB(i - 1, j + 1, k + 1, em::ex3); + c122 = EB(i, j + 1, k + 1, em::ex3); + c222 = EB(i + 1, j + 1, k + 1, em::ex3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + e0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx1 + // Interpolate -- (primal, dual, dual) + c000 = EB(i - 1, j - 1, k - 1, em::bx1); + c100 = EB(i, j - 1, k - 1, em::bx1); + c200 = EB(i + 1, j - 1, k - 1, em::bx1); + c010 = EB(i - 1, j, k - 1, em::bx1); + c110 = EB(i, j, k - 1, em::bx1); + c210 = EB(i + 1, j, k - 1, em::bx1); + c020 = EB(i - 1, j + 1, k - 1, em::bx1); + c120 = EB(i, j + 1, k - 1, em::bx1); + c220 = EB(i + 1, j + 1, k - 1, em::bx1); + + c001 = EB(i - 1, j - 1, k, em::bx1); + c101 = EB(i, j - 1, k, em::bx1); + c201 = EB(i + 1, j - 1, k, em::bx1); + c011 = EB(i - 1, j, k, em::bx1); + c111 = EB(i, j, k, em::bx1); + c211 = EB(i + 1, j, k, em::bx1); + c021 = EB(i - 1, j + 1, k, em::bx1); + c121 = EB(i, j + 1, k, em::bx1); + c221 = EB(i + 1, j + 1, k, em::bx1); + + c002 = EB(i - 1, j - 1, k + 1, em::bx1); + c102 = EB(i, j - 1, k + 1, em::bx1); + c202 = EB(i + 1, j - 1, k + 1, em::bx1); + c012 = EB(i - 1, j, k + 1, em::bx1); + c112 = EB(i, j, k + 1, em::bx1); + c212 = EB(i + 1, j, k + 1, em::bx1); + c022 = EB(i - 1, j + 1, k + 1, em::bx1); + c122 = EB(i, j + 1, k + 1, em::bx1); + c222 = EB(i + 1, j + 1, k + 1, em::bx1); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx2 + // Interpolate -- (dual, primal, dual) + c000 = EB(i - 1, j - 1, k - 1, em::bx2); + c100 = EB(i, j - 1, k - 1, em::bx2); + c200 = EB(i + 1, j - 1, k - 1, em::bx2); + c010 = EB(i - 1, j, k - 1, em::bx2); + c110 = EB(i, j, k - 1, em::bx2); + c210 = EB(i + 1, j, k - 1, em::bx2); + c020 = EB(i - 1, j + 1, k - 1, em::bx2); + c120 = EB(i, j + 1, k - 1, em::bx2); + c220 = EB(i + 1, j + 1, k - 1, em::bx2); + + c001 = EB(i - 1, j - 1, k, em::bx2); + c101 = EB(i, j - 1, k, em::bx2); + c201 = EB(i + 1, j - 1, k, em::bx2); + c011 = EB(i - 1, j, k, em::bx2); + c111 = EB(i, j, k, em::bx2); + c211 = EB(i + 1, j, k, em::bx2); + c021 = EB(i - 1, j + 1, k, em::bx2); + c121 = EB(i, j + 1, k, em::bx2); + c221 = EB(i + 1, j + 1, k, em::bx2); + + c002 = EB(i - 1, j - 1, k + 1, em::bx2); + c102 = EB(i, j - 1, k + 1, em::bx2); + c202 = EB(i + 1, j - 1, k + 1, em::bx2); + c012 = EB(i - 1, j, k + 1, em::bx2); + c112 = EB(i, j, k + 1, em::bx2); + c212 = EB(i + 1, j, k + 1, em::bx2); + c022 = EB(i - 1, j + 1, k + 1, em::bx2); + c122 = EB(i, j + 1, k + 1, em::bx2); + c222 = EB(i + 1, j + 1, k + 1, em::bx2); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + + // Bx3 + // Interpolate -- (dual, dual, primal) + c000 = EB(i - 1, j - 1, k - 1, em::bx3); + c100 = EB(i, j - 1, k - 1, em::bx3); + c200 = EB(i + 1, j - 1, k - 1, em::bx3); + c010 = EB(i - 1, j, k - 1, em::bx3); + c110 = EB(i, j, k - 1, em::bx3); + c210 = EB(i + 1, j, k - 1, em::bx3); + c020 = EB(i - 1, j + 1, k - 1, em::bx3); + c120 = EB(i, j + 1, k - 1, em::bx3); + c220 = EB(i + 1, j + 1, k - 1, em::bx3); + + c001 = EB(i - 1, j - 1, k, em::bx3); + c101 = EB(i, j - 1, k, em::bx3); + c201 = EB(i + 1, j - 1, k, em::bx3); + c011 = EB(i - 1, j, k, em::bx3); + c111 = EB(i, j, k, em::bx3); + c211 = EB(i + 1, j, k, em::bx3); + c021 = EB(i - 1, j + 1, k, em::bx3); + c121 = EB(i, j + 1, k, em::bx3); + c221 = EB(i + 1, j + 1, k, em::bx3); + + c002 = EB(i - 1, j - 1, k + 1, em::bx3); + c102 = EB(i, j - 1, k + 1, em::bx3); + c202 = EB(i + 1, j - 1, k + 1, em::bx3); + c012 = EB(i - 1, j, k + 1, em::bx3); + c112 = EB(i, j, k + 1, em::bx3); + c212 = EB(i + 1, j, k + 1, em::bx3); + c022 = EB(i - 1, j + 1, k + 1, em::bx3); + c122 = EB(i, j + 1, k + 1, em::bx3); + c222 = EB(i + 1, j + 1, k + 1, em::bx3); + + c0 = c000 * w0x + c100 * w1x + c200 * w2x; + c1 = c010 * w0x + c110 * w1x + c210 * w2x; + c2 = c020 * w0x + c120 * w1x + c220 * w2x; + c00 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c001 * w0x + c101 * w1x + c201 * w2x; + c1 = c011 * w0x + c111 * w1x + c211 * w2x; + c2 = c021 * w0x + c121 * w1x + c221 * w2x; + c01 = c0 * w0y + c1 * w1y + c2 * w2y; + + c0 = c002 * w0x + c102 * w1x + c202 * w2x; + c1 = c012 * w0x + c112 * w1x + c212 * w2x; + c2 = c022 * w0x + c122 * w1x + c222 * w2x; + c02 = c0 * w0y + c1 * w1y + c2 * w2y; + + b0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + } + } + // Extra Inline void boundaryConditions(index_t& p, coord_t& xp) const { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { From 6eabea9cb9be45c70e5c5e7be0fe61c7670187cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 24 May 2025 18:11:30 -0500 Subject: [PATCH 033/154] bugfix in Jz --- src/kernels/currents_deposit.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 257e3e8f6..76743f44a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -768,7 +768,7 @@ namespace kernel { const real_t Qdxdt = coeff * inv_dt; const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; + const real_t QVz = coeff * vp[2]; // Esirkepov - Eq. 39 // x-component From 6a9ff16e5d7ec3bb2793f6f90b78de0a7805f5b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 26 May 2025 13:27:32 -0500 Subject: [PATCH 034/154] update to deposit test --- src/kernels/tests/deposit.cpp | 406 +++++++++++++++++++++++++++++----- 1 file changed, 353 insertions(+), 53 deletions(-) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index d64e4bb2f..bdfbb26a1 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -81,12 +81,16 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - const int i0 = 4, j0 = 4; + const int i0 = 3, j0 = 3; + const int i0f = 3, j0f = 3; + const real_t uz = 0.5; - const prtldx_t dxi = 0.53, dxf = 0.47; - const prtldx_t dyi = 0.34, dyf = 0.52; - const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; - const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + // const prtldx_t dxi = 0.53, dxf = 0.47; + // const prtldx_t dyi = 0.34, dyf = 0.52; + const prtldx_t dxi = 0.65, dxf = 0.65; + const prtldx_t dyi = 0.65, dyf = 0.65; + const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0f + (real_t)dxf; + const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0f + (real_t)dyf; const real_t xr = 0.5 * (xi + xf); const real_t yr = 0.5 * (yi + yf); @@ -103,20 +107,33 @@ void testDeposit(const std::vector& res, const real_t Fy1 = (yr - yi); const real_t Fy2 = (yf - yr); + const real_t Fz1 = HALF * uz / math::sqrt(1.0 + uz * uz); + const real_t Fz2 = HALF * uz / math::sqrt(1.0 + uz * uz); + const real_t Jx1 = Fx1 * (1 - Wy1) + Fx2 * (1 - Wy2); const real_t Jx2 = Fx1 * Wy1 + Fx2 * Wy2; const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - put_value(i1, i0, 0); - put_value(i2, j0, 0); + const real_t Jz = Fz1 * (1 - Wx1) + Fz2 * (1 - Wy1) + + Fz1 * Wx1 * (1 - Wy1) + + Fz1 * (1 - Wx1) * Wy1 + + Fz1 * Wx1 * Wy1 + + Fz2 * (1 - Wx2) * (1 - Wy2) + + Fz2 * Wx2 * (1 - Wy2) + + Fz2 * (1 - Wx2) * Wy2 + + Fz2 * Wx2 * Wy2; + + put_value(i1, i0f, 0); + put_value(i2, j0f, 0); put_value(i1_prev, i0, 0); put_value(i2_prev, j0, 0); put_value(dx1, dxf, 0); put_value(dx2, dyf, 0); put_value(dx1_prev, dxi, 0); put_value(dx2_prev, dyi, 0); + put_value(ux3, uz, 0); put_value(weight, 1.0, 0); put_value(tag, ParticleTag::alive, 0); @@ -124,7 +141,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -140,7 +157,7 @@ void testDeposit(const std::vector& res, { N_GHOSTS, N_GHOSTS }, { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); - real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; + real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO, SumJz = ZERO; Kokkos::parallel_reduce( "SumDivJ", range, @@ -162,6 +179,12 @@ void testDeposit(const std::vector& res, Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, SumJy); + Kokkos::parallel_reduce( + "SumJy", + range, + Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx3); }, + SumJz); + auto J_h = Kokkos::create_mirror_view(J); Kokkos::deep_copy(J_h, J); @@ -171,6 +194,7 @@ void testDeposit(const std::vector& res, std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; + std::cout << "SumJz: " << SumJz << " expected " << Jz << std::endl; // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), Jx1, "", acc), // "DepositCurrents_kernel::Jx1 is incorrect"); // errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), Jx2, "", acc), @@ -181,6 +205,282 @@ void testDeposit(const std::vector& res, // "DepositCurrents_kernel::Jy2 is incorrect"); } +// void ind_pond(real_t Rcoord, int* Iindices, real_t* Rpond) { + +// // Assuming interp_order is an integer and Rcoord is a double +// int i_min = std::floor(Rcoord - HALF); + +// // Populate Iindices +// for (int i = 0; i < 3; ++i) { +// Iindices[i] = i_min + i; +// } + +// // Eq. 24 +// Rpond[0] = 0.5 * std::pow(0.5 + (static_cast(Iindices[1]) - Rcoord), 2); +// Rpond[1] = 0.75 - std::pow(static_cast(Iindices[1]) - Rcoord, 2); +// Rpond[2] = 0.5 * std::pow(0.5 - (static_cast(Iindices[1]) - Rcoord), 2); +// } + +// template +// void testDeposit_2nd(const std::vector& res, +// const boundaries_t& ext, +// const std::map& params = {}, +// const real_t acc = ONE) { +// static_assert(M::Dim == 2); +// errorIf(res.size() != M::Dim, "res.size() != M::Dim"); +// using namespace ntt; + +// M metric { res, ext, params }; + +// const auto nx1 = res[0]; +// const auto nx2 = res[1]; + +// ndfield_t J { "J", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; +// array_t i1 { "i1", 10 }; +// array_t i2 { "i2", 10 }; +// array_t i3 { "i3", 10 }; +// array_t i1_prev { "i1_prev", 10 }; +// array_t i2_prev { "i2_prev", 10 }; +// array_t i3_prev { "i3_prev", 10 }; +// array_t dx1 { "dx1", 10 }; +// array_t dx2 { "dx2", 10 }; +// array_t dx3 { "dx3", 10 }; +// array_t dx1_prev { "dx1_prev", 10 }; +// array_t dx2_prev { "dx2_prev", 10 }; +// array_t dx3_prev { "dx3_prev", 10 }; +// array_t ux1 { "ux1", 10 }; +// array_t ux2 { "ux2", 10 }; +// array_t ux3 { "ux3", 10 }; +// array_t phi { "phi", 10 }; +// array_t weight { "weight", 10 }; +// array_t tag { "tag", 10 }; +// const real_t charge { 1.0 }, inv_dt { 1.0 }; + +// const int i0 = 4, j0 = 4; + +// // initial and final positions +// const prtldx_t dxi = 0.53, dxf = 0.47; +// const prtldx_t dyi = 0.34, dyf = 0.52; +// const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0 + (real_t)dxf; +// const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0 + (real_t)dyf; + +// // const real_t xr = 0.5 * (xi + xf); +// // const real_t yr = 0.5 * (yi + yf); + +// // const real_t Wx1 = 0.5 * (xi + xr) - (real_t)i0; +// // const real_t Wx2 = 0.5 * (xf + xr) - (real_t)i0; + +// // const real_t Wy1 = 0.5 * (yi + yr) - (real_t)j0; +// // const real_t Wy2 = 0.5 * (yf + yr) - (real_t)j0; + +// // const real_t Fx1 = (xr - xi); +// // const real_t Fx2 = (xf - xr); + +// // const real_t Fy1 = (yr - yi); +// // const real_t Fy2 = (yf - yr); + +// // const real_t Jx1 = Fx1 * (1 - Wy1) + Fx2 * (1 - Wy2); +// // const real_t Jx2 = Fx1 * Wy1 + Fx2 * Wy2; + +// // const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); +// // const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; + +// // Define interp_order +// constexpr int interp_order = 2; +// const real_t aux_jx = 1.0; +// const real_t aux_jy = 1.0; +// const real_t aux_jz = 1.0; + +// // Arrays with size (interp_order + 1) +// std::array ISx1, ISx2; +// std::array PondSx1, PondSx2; +// std::array ISy1, ISy2; +// std::array PondSy1, PondSy2; + +// // 2D arrays with size (interp_order + 2) x (interp_order + 2) +// std::array, interp_order + 2> WEsirkx, +// WEsirky, WEsirkz; +// std::array, interp_order + 2> jx_local, +// jy_local; + +// std::array, 10> jx, jy, jz; +// std::fill(jx.begin(), jx.end(), 0.0); +// std::fill(jy.begin(), jy.end(), 0.0); +// std::fill(jz.begin(), jz.end(), 0.0); +// // 1D arrays with size (interp_order + 2) +// std::array Sx2, Sx1, Sy2, Sy1; + +// // Interpolation coefficients +// ind_pond(xi, &ISx1, &PondSx1); +// ind_pond(xf, &ISx2, &PondSx2); +// ind_pond(yi, &ISy1, &PondSy1); +// ind_pond(yf, &ISy2, &PondSy2); + +// int min_x, max_x; +// int min_y, max_y; + +// // Esirkepov coefficients W +// int shift_Ix = ISx2[0] - ISx1[0]; +// std::fill(Sx2.begin(), Sx2.end(), 0.0); +// std::fill(Sx1.begin(), Sx1.end(), 0.0); + +// if (shift_Ix == 0) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin()); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin()); +// min_x = ISx2[0]; +// max_x = ISx2[interp_order]; +// } else if (shift_Ix == 1) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin() + 1); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin()); +// min_x = ISx1[0]; +// max_x = ISx2[interp_order]; +// } else if (shift_Ix == -1) { +// std::copy(PondSx2.begin(), PondSx2.end(), Sx2.begin()); +// std::copy(PondSx1.begin(), PondSx1.end(), Sx1.begin() + 1); +// min_x = ISx2[0]; +// max_x = ISx1[interp_order]; +// } + +// int shift_Iy = ISy2[0] - ISy1[0]; +// std::fill(Sy2.begin(), Sy2.end(), 0.0); +// std::fill(Sy1.begin(), Sy1.end(), 0.0); + +// if (shift_Iy == 0) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin()); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin()); +// min_y = ISy2[0]; +// max_y = ISy2[interp_order]; +// } else if (shift_Iy == 1) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin() + 1); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin()); +// min_y = ISy1[0]; +// max_y = ISy2[interp_order]; +// } else if (shift_Iy == -1) { +// std::copy(PondSy2.begin(), PondSy2.end(), Sy2.begin()); +// std::copy(PondSy1.begin(), PondSy1.end(), Sy1.begin() + 1); +// min_y = ISy2[0]; +// max_y = ISy1[interp_order]; +// } + +// for (int i = 0; i < interp_order + 2; ++i) { +// for (int j = 0; j < interp_order + 2; ++j) { +// WEsirkx[i][j] = 0.5 * (Sx2[i] - Sx1[i]) * (Sy2[j] + Sy1[j]); +// WEsirky[i][j] = 0.5 * (Sx2[i] + Sx1[i]) * (Sy2[j] - Sy1[j]); +// WEsirkz[i][j] = THIRD * (Sy2[j] * (0.5 * Sx1[i] + Sx2[i]) + +// Sy1[j] * (0.5 * Sx2[i] + Sx1[i])); +// } +// } + +// // Current deposition jx +// for (int j = 0; j < interp_order + 2; ++j) { +// jx_local[0][j] = -aux_jx * WEsirkx[0][j]; +// } +// for (int i = 1; i < interp_order + 2; ++i) { +// for (int j = 0; j < interp_order + 2; ++j) { +// jx_local[i][j] = jx_local[i - 1][j] - aux_jx * WEsirkx[i][j]; +// } +// } +// for (int i = 0; i < max_x - min_x; ++i) { +// for (int j = 0; j < max_y - min_y + 1; ++j) { +// jx[min_x + i][min_y + j] += jx_local[i][j]; +// } +// } + +// // Current deposition jy +// for (int i = 0; i < interp_order + 2; ++i) { +// jy_local[i][0] = -aux_jy * WEsirky[i][0]; +// } +// for (int j = 1; j < interp_order + 2; ++j) { +// for (int i = 0; i < interp_order + 2; ++i) { +// jy_local[i][j] = jy_local[i][j - 1] - aux_jy * WEsirky[i][j]; +// } +// } +// for (int i = 0; i < max_x - min_x + 1; ++i) { +// for (int j = 0; j < max_y - min_y; ++j) { +// jy[min_x + i][min_y + j] += jy_local[i][j]; +// } +// } + +// // Current deposition jz +// for (int i = 0; i < max_x - min_x + 1; ++i) { +// for (int j = 0; j < max_y - min_y + 1; ++j) { +// jz[min_x + i][min_y + j] += aux_jz * WEsirkz[i][j]; +// } +// } + +// // define particle positions +// put_value(i1, i0, 0); +// put_value(i2, j0, 0); +// put_value(i1_prev, i0, 0); +// put_value(i2_prev, j0, 0); +// put_value(dx1, dxf, 0); +// put_value(dx2, dyf, 0); +// put_value(dx1_prev, dxi, 0); +// put_value(dx2_prev, dyi, 0); +// put_value(weight, 1.0, 0); +// put_value(tag, ParticleTag::alive, 0); + +// auto J_scat = Kokkos::Experimental::create_scatter_view(J); + +// // clang-format off +// Kokkos::parallel_for("CurrentsDeposit", 10, +// kernel::DepositCurrents_kernel(J_scat, +// i1, i2, i3, +// i1_prev, i2_prev, i3_prev, +// dx1, dx2, dx3, +// dx1_prev, dx2_prev, dx3_prev, +// ux1, ux2, ux3, +// phi, weight, tag, +// metric, charge, inv_dt)); +// // clang-format on + +// Kokkos::Experimental::contribute(J, J_scat); + +// const auto range = Kokkos::MDRangePolicy>( +// { N_GHOSTS, N_GHOSTS }, +// { nx1 + N_GHOSTS, nx2 + N_GHOSTS }); + +// real_t SumDivJ = ZERO, SumJx = ZERO, SumJy = ZERO; +// Kokkos::parallel_reduce( +// "SumDivJ", +// range, +// Lambda(const int i, const int j, real_t& sum) { +// sum += J(i, j, cur::jx1) - J(i - 1, j, cur::jx1) + J(i, j, cur::jx2) - +// J(i, j - 1, cur::jx2); +// }, +// SumDivJ); + +// Kokkos::parallel_reduce( +// "SumJx", +// range, +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, +// SumJx); + +// Kokkos::parallel_reduce( +// "SumJy", +// range, +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, +// SumJy); + +// auto J_h = Kokkos::create_mirror_view(J); +// Kokkos::deep_copy(J_h, J); + +// if (not cmp::AlmostZero(SumDivJ)) { +// throw std::logic_error("DepositCurrents_kernel::SumDivJ != 0"); +// } + +// // std::cout << "SumJx: " << SumJx << " expected " << Jx1 + Jx2 << std::endl; +// // std::cout << "SumJy: " << SumJy << " expected " << Jy1 + Jy2 << std::endl; +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx1), jx[i0][j0], "", acc), +// "DepositCurrents_kernel::Jx1 is incorrect"); +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + 1 + N_GHOSTS, cur::jx1), jx[i0][j0+1], "", acc), +// "DepositCurrents_kernel::Jx2 is incorrect"); +// errorIf(not equal(J_h(i0 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), jy[i0][j0], "", acc), +// "DepositCurrents_kernel::Jy1 is incorrect"); +// errorIf(not equal(J_h(i0 + 1 + N_GHOSTS, j0 + N_GHOSTS, cur::jx2), jy[i0][j0+1], "", acc), +// "DepositCurrents_kernel::Jy2 is incorrect"); +// } + auto main(int argc, char* argv[]) -> int { Kokkos::initialize(argc, argv); @@ -197,50 +497,50 @@ auto main(int argc, char* argv[]) -> int { {}, 500); - testDeposit, SimEngine::SRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - {}, - 500); - - testDeposit, SimEngine::SRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "r0", 0.0 }, { "h", 0.25 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "a", 0.9 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "r0", 0.0 }, { "h", 0.25 }, { "a", 0.9 } }, - 500); - - testDeposit, SimEngine::GRPIC>( - { - 10, - 10 - }, - { { 1.0, 100.0 } }, - { { "a", 0.9 } }, - 500); + // testDeposit, SimEngine::SRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // {}, + // 500); + + // testDeposit, SimEngine::SRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "r0", 0.0 }, { "h", 0.25 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "a", 0.9 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "r0", 0.0 }, { "h", 0.25 }, { "a", 0.9 } }, + // 500); + + // testDeposit, SimEngine::GRPIC>( + // { + // 10, + // 10 + // }, + // { { 1.0, 100.0 } }, + // { { "a", 0.9 } }, + // 500); } catch (std::exception& e) { std::cerr << e.what() << std::endl; From 56763b8998aea724641c6dae4e556ac1f47cb319 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 26 May 2025 14:28:11 -0400 Subject: [PATCH 035/154] kernel error --- src/kernels/currents_deposit.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 76743f44a..0b7409f4a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -148,7 +148,7 @@ namespace kernel { S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { - raise::Error("Invalid shift in indices", HERE); + raise::KernelError(HERE, "Invalid shift in indices"); } // account for ghost cells here to shorten J update expression @@ -285,7 +285,7 @@ namespace kernel { S1_3 = static_cast(1 / 6) * di_center3; S1_4 = ZERO; } else { - raise::Error("Invalid shift in indices", HERE); + raise::KernelError(HERE, "Invalid shift in indices"); } // account for ghost cells here to shorten J update expression @@ -862,7 +862,7 @@ namespace kernel { J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; } /* - z - component, simulated direction + z - component, unsimulated direction */ J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; @@ -931,7 +931,7 @@ namespace kernel { i3(p), dx3(p), i3_prev(p), dx3_prev(p)); // clang-format on - + // Calculate weight function // for (int i = 0; i < interp_order + 2; ++i) { // for (int j = 0; j < interp_order + 2; ++j) { @@ -2188,4 +2188,4 @@ namespace kernel { #undef i_di_to_Xi -#endif // KERNELS_CURRENTS_DEPOSIT_HPP \ No newline at end of file +#endif // KERNELS_CURRENTS_DEPOSIT_HPP From 84fcb6c372bb228e43b9b05910e6037d2d354bc7 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 26 May 2025 14:40:35 -0400 Subject: [PATCH 036/154] deposit test fixed --- src/kernels/tests/deposit.cpp | 38 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/kernels/tests/deposit.cpp b/src/kernels/tests/deposit.cpp index bdfbb26a1..3ccb49993 100644 --- a/src/kernels/tests/deposit.cpp +++ b/src/kernels/tests/deposit.cpp @@ -81,14 +81,14 @@ void testDeposit(const std::vector& res, array_t tag { "tag", 10 }; const real_t charge { 1.0 }, inv_dt { 1.0 }; - const int i0 = 3, j0 = 3; - const int i0f = 3, j0f = 3; - const real_t uz = 0.5; - - // const prtldx_t dxi = 0.53, dxf = 0.47; - // const prtldx_t dyi = 0.34, dyf = 0.52; - const prtldx_t dxi = 0.65, dxf = 0.65; - const prtldx_t dyi = 0.65, dyf = 0.65; + const int i0 = 4, j0 = 4; + const int i0f = 3, j0f = 3; + const real_t uz = 2.5; + + // const prtldx_t dxi = 0.53, dxf = 0.47; + // const prtldx_t dyi = 0.34, dyf = 0.52; + const prtldx_t dxi = 0.65, dxf = 0.99; + const prtldx_t dyi = 0.65, dyf = 0.80; const real_t xi = (real_t)i0 + (real_t)dxi, xf = (real_t)i0f + (real_t)dxf; const real_t yi = (real_t)j0 + (real_t)dyi, yf = (real_t)j0f + (real_t)dyf; @@ -116,14 +116,10 @@ void testDeposit(const std::vector& res, const real_t Jy1 = Fy1 * (1 - Wx1) + Fy2 * (1 - Wx2); const real_t Jy2 = Fy1 * Wx1 + Fy2 * Wx2; - const real_t Jz = Fz1 * (1 - Wx1) + Fz2 * (1 - Wy1) + - Fz1 * Wx1 * (1 - Wy1) + - Fz1 * (1 - Wx1) * Wy1 + - Fz1 * Wx1 * Wy1 + - Fz2 * (1 - Wx2) * (1 - Wy2) + - Fz2 * Wx2 * (1 - Wy2) + - Fz2 * (1 - Wx2) * Wy2 + - Fz2 * Wx2 * Wy2; + const real_t Jz = Fz1 * (1 - Wx1) * (1 - Wy1) + Fz1 * Wx1 * (1 - Wy1) + + Fz1 * (1 - Wx1) * Wy1 + Fz1 * Wx1 * Wy1 + + Fz2 * (1 - Wx2) * (1 - Wy2) + Fz2 * Wx2 * (1 - Wy2) + + Fz2 * (1 - Wx2) * Wy2 + Fz2 * Wx2 * Wy2; put_value(i1, i0f, 0); put_value(i2, j0f, 0); @@ -141,7 +137,7 @@ void testDeposit(const std::vector& res, // clang-format off Kokkos::parallel_for("CurrentsDeposit", 10, - kernel::DepositCurrents_kernel(J_scat, + kernel::DepositCurrents_kernel(J_scat, i1, i2, i3, i1_prev, i2_prev, i3_prev, dx1, dx2, dx3, @@ -453,14 +449,14 @@ void testDeposit(const std::vector& res, // Kokkos::parallel_reduce( // "SumJx", // range, -// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); }, -// SumJx); +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx1); +// }, SumJx); // Kokkos::parallel_reduce( // "SumJy", // range, -// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); }, -// SumJy); +// Lambda(const int i, const int j, real_t& sum) { sum += J(i, j, cur::jx2); +// }, SumJy); // auto J_h = Kokkos::create_mirror_view(J); // Kokkos::deep_copy(J_h, J); From 9b8985d4916d4f5056cd161dba91838046934df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 14:34:07 -0500 Subject: [PATCH 037/154] fixes to pusher --- src/kernels/particle_pusher_sr.hpp | 883 ++++++++++++++++------------- 1 file changed, 475 insertions(+), 408 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 2d89f5875..7ccd06a80 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,9 +475,9 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - //getInterpFlds2nd(p, ei, bi); + // getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + getInterpFlds2nd(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -1100,46 +1100,59 @@ namespace kernel::sr { const int i { i1(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; + + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int ind = static_cast(static_cast(dx1_ + HALF)); + // Compute weights for second-order interpolation - real_t w0 = HALF * SQR(HALF - dx1_); - real_t w1 = static_cast(0.75) - SQR(dx1_); - real_t w2 = HALF * SQR(HALF + dx1_); + // primal + const auto wp0 = HALF * SQR(HALF - dx1_); + const auto wp1 = static_cast(0.75) - SQR(dx1_); + const auto wp2 = HALF * SQR(HALF + dx1_); + // dual - ToDo! + const auto wd0 = HALF * SQR(HALF - dx1_); + const auto wd1 = static_cast(0.75) - SQR(dx1_); + const auto wd2 = HALF * SQR(HALF + dx1_); // Ex1 (dual grid) - real_t c0 = EB(i - 1, em::ex1); // First grid point - real_t c1 = EB(i, em::ex1); // Second grid point - real_t c2 = EB(i + 1, em::ex1); // Third grid point - e0[0] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex1_0 = EB(ind + i - 1, em::ex1); + const auto ex1_1 = EB(ind + i, em::ex1); + const auto ex1_2 = EB(ind + i + 1, em::ex1); + e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; // Ex2 (primal grid) - c0 = EB(i - 1, em::ex2); // First grid point - c1 = EB(i, em::ex2); // Second grid point - c2 = EB(i + 1, em::ex2); // Third grid point - e0[1] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex2_0 = EB(i - 1, em::ex2); + const auto ex2_1 = EB(i, em::ex2); + const auto ex2_2 = EB(i + 1, em::ex2); + e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; // Ex3 (primal grid) - c0 = EB(i - 1, em::ex3); // First grid point - c1 = EB(i, em::ex3); // Second grid point - c2 = EB(i + 1, em::ex3); // Third grid point - e0[2] = c0 * w0 + c1 * w1 + c2 * w2; + const auto ex3_0 = EB(i - 1, em::ex3); + const auto ex3_1 = EB(i, em::ex3); // Second grid point + const auto ex3_2 = EB(i + 1, em::ex3); + e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; // Bx1 (primal grid) - c0 = EB(i - 1, em::bx1); // First grid point - c1 = EB(i, em::bx1); // Second grid point - c2 = EB(i + 1, em::bx1); // Third grid point - b0[0] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx1_0 = EB(i - 1, em::bx1); + const auto bx1_1 = EB(i, em::bx1); // Second grid point + const auto bx1_2 = EB(i + 1, em::bx1); + b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; // Bx2 (dual grid) - c0 = EB(i - 2, em::bx2); // First grid point - c1 = EB(i - 1, em::bx2); // Second grid point - c2 = EB(i, em::bx2); // Third grid point - b0[1] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx2_0 = EB(ind + i - 2, em::bx2); + const auto bx2_1 = EB(ind + i - 1, em::bx2); // Second grid point + const auto bx2_2 = EB(ind + i, em::bx2); + b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; // Bx3 (dual grid) - c0 = EB(i - 2, em::bx3); // First grid point - c1 = EB(i - 1, em::bx3); // Second grid point - c2 = EB(i, em::bx3); // Third grid point - b0[2] = c0 * w0 + c1 * w1 + c2 * w2; + const auto bx3_0 = EB(ind + i - 2, em::bx3); + const auto bx3_1 = EB(ind + i - 1, em::bx3); // Second grid point + const auto bx3_2 = EB(ind + i, em::bx3); + b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; } else if constexpr (D == Dim::_2D) { const int i { i1(p) + static_cast(N_GHOSTS) }; @@ -1147,116 +1160,149 @@ namespace kernel::sr { const auto dx1_ { static_cast(dx1(p)) }; const auto dx2_ { static_cast(dx2(p)) }; - // Compute weights for second-order interpolation - real_t w0x = HALF * SQR(HALF - dx1_); - real_t w1x = static_cast(0.75) - SQR(dx1_); - real_t w2x = HALF * SQR(HALF + dx1_); + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; - real_t w0y = HALF * SQR(HALF - dx2_); - real_t w1y = static_cast(0.75) - SQR(dx2_); - real_t w2y = HALF * SQR(HALF + dx2_); + const int dx2_less_half = static_cast(dx2_ < + static_cast(0.5)); + const auto dx2_center = static_cast(dx2_less_half) - dx2_; + + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int indx = static_cast(static_cast(dx1_ + HALF)); + const int indy = static_cast(static_cast(dx2_ + HALF)); + + // Compute weights for second-order interpolation + // primal + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0py = HALF * SQR(HALF + dx2_center); + const auto w1py = static_cast(0.75) - SQR(dx2_center); + const auto w2py = HALF * SQR(HALF - dx2_center); + + // dual - ToDo! + const auto w0dx = HALF * SQR(HALF + dx1_center); + const auto w1dx = static_cast(0.75) - SQR(dx1_center); + const auto w2dx = HALF * SQR(HALF - dx1_center); + const auto w0dy = HALF * SQR(HALF + dx2_center); + const auto w1dy = static_cast(0.75) - SQR(dx2_center); + const auto w2dy = HALF * SQR(HALF - dx2_center); // Ex1 // Interpolate --- (dual, primal) - real_t c000 = EB(i - 1, j - 1, em::ex1); - real_t c100 = EB(i, j - 1, em::ex1); - real_t c200 = EB(i + 1, j - 1, em::ex1); - real_t c010 = EB(i - 1, j, em::ex1); - real_t c110 = EB(i, j, em::ex1); - real_t c210 = EB(i + 1, j, em::ex1); - real_t c020 = EB(i - 1, j + 1, em::ex1); - real_t c120 = EB(i, j + 1, em::ex1); - real_t c220 = EB(i + 1, j + 1, em::ex1); - - real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; - real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; - real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex1_000 = EB(indx + i - 1, j - 1, em::ex1); + const auto ex1_100 = EB(indx + i, j - 1, em::ex1); + const auto ex1_200 = EB(indx + i + 1, j - 1, em::ex1); + const auto ex1_010 = EB(indx + i - 1, j, em::ex1); + const auto ex1_110 = EB(indx + i, j, em::ex1); + const auto ex1_210 = EB(indx + i + 1, j, em::ex1); + const auto ex1_020 = EB(indx + i - 1, j + 1, em::ex1); + const auto ex1_120 = EB(indx + i, j + 1, em::ex1); + const auto ex1_220 = EB(indx + i + 1, j + 1, em::ex1); + // clang-format on + + const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; + const auto ex1_1 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; + const auto ex1_2 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; + e0[0] = ex1_0 * w0py + ex1_1 * w1py + ex1_2 * w2py; // Ex2 // Interpolate --- (primal, dual) - c000 = EB(i - 1, j - 1, em::ex2); - c100 = EB(i, j - 1, em::ex2); - c200 = EB(i + 1, j - 1, em::ex2); - c010 = EB(i - 1, j, em::ex2); - c110 = EB(i, j, em::ex2); - c210 = EB(i + 1, j, em::ex2); - c020 = EB(i - 1, j + 1, em::ex2); - c120 = EB(i, j + 1, em::ex2); - c220 = EB(i + 1, j + 1, em::ex2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex2_000 = EB(i - 1, indy + j - 1, em::ex2); + const auto ex2_100 = EB(i, indy + j - 1, em::ex2); + const auto ex2_200 = EB(i + 1, indy + j - 1, em::ex2); + const auto ex2_010 = EB(i - 1, indy + j, em::ex2); + const auto ex2_110 = EB(i, indy + j, em::ex2); + const auto ex2_210 = EB(i + 1, indy + j, em::ex2); + const auto ex2_020 = EB(i - 1, indy + j + 1, em::ex2); + const auto ex2_120 = EB(i, indy + j + 1, em::ex2); + const auto ex2_220 = EB(i + 1, indy + j + 1, em::ex2); + // clang-format on + + const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; + const auto ex2_1 = ex2_010 * w0px + ex2_110 * w1px + ex2_210 * w2px; + const auto ex2_2 = ex2_020 * w0px + ex2_120 * w1px + ex2_220 * w2px; + e0[1] = ex2_0 * w0dy + ex2_1 * w1dy + ex2_2 * w2dy; // Ex3 // Interpolate --- (primal, primal) - c000 = EB(i - 1, j - 1, em::ex3); - c100 = EB(i, j - 1, em::ex3); - c200 = EB(i + 1, j - 1, em::ex3); - c010 = EB(i - 1, j, em::ex3); - c110 = EB(i, j, em::ex3); - c210 = EB(i + 1, j, em::ex3); - c020 = EB(i - 1, j + 1, em::ex3); - c120 = EB(i, j + 1, em::ex3); - c220 = EB(i + 1, j + 1, em::ex3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - e0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto ex3_000 = EB(i - 1, j - 1, em::ex3); + const auto ex3_100 = EB(i, j - 1, em::ex3); + const auto ex3_200 = EB(i + 1, j - 1, em::ex3); + const auto ex3_010 = EB(i - 1, j, em::ex3); + const auto ex3_110 = EB(i, j, em::ex3); + const auto ex3_210 = EB(i + 1, j, em::ex3); + const auto ex3_020 = EB(i - 1, j + 1, em::ex3); + const auto ex3_120 = EB(i, j + 1, em::ex3); + const auto ex3_220 = EB(i + 1, j + 1, em::ex3); + // clang-format on + + const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; + const auto ex3_1 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; + const auto ex3_2 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; + e0[2] = ex3_0 * w0py + ex3_1 * w1py + ex3_2 * w2py; // Bx1 // Interpolate --- (primal, dual) - c000 = EB(i - 1, j - 1, em::bx1); - c100 = EB(i, j - 1, em::bx1); - c200 = EB(i + 1, j - 1, em::bx1); - c010 = EB(i - 1, j, em::bx1); - c110 = EB(i, j, em::bx1); - c210 = EB(i + 1, j, em::bx1); - c020 = EB(i - 1, j + 1, em::bx1); - c120 = EB(i, j + 1, em::bx1); - c220 = EB(i + 1, j + 1, em::bx1); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[0] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx1_000 = EB(i - 1, indy + j - 1, em::bx1); + const auto bx1_100 = EB(i, indy + j - 1, em::bx1); + const auto bx1_200 = EB(i + 1, indy + j - 1, em::bx1); + const auto bx1_010 = EB(i - 1, indy + j, em::bx1); + const auto bx1_110 = EB(i, indy + j, em::bx1); + const auto bx1_210 = EB(i + 1, indy + j, em::bx1); + const auto bx1_020 = EB(i - 1, indy + j + 1, em::bx1); + const auto bx1_120 = EB(i, indy + j + 1, em::bx1); + const auto bx1_220 = EB(i + 1, indy + j + 1, em::bx1); + // clang-format on + + const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; + const auto bx1_1 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; + const auto bx1_2 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; + b0[0] = bx1_0 * w0dy + bx1_1 * w1dy + bx1_2 * w2dy; // Bx2 // Interpolate --- (dual, primal) - c000 = EB(i - 1, j - 1, em::bx2); - c100 = EB(i, j - 1, em::bx2); - c200 = EB(i + 1, j - 1, em::bx2); - c010 = EB(i - 1, j, em::bx2); - c110 = EB(i, j, em::bx2); - c210 = EB(i + 1, j, em::bx2); - c020 = EB(i - 1, j + 1, em::bx2); - c120 = EB(i, j + 1, em::bx2); - c220 = EB(i + 1, j + 1, em::bx2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[1] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx2_000 = EB(indx + i - 1, j - 1, em::bx2); + const auto bx2_100 = EB(indx + i, j - 1, em::bx2); + const auto bx2_200 = EB(indx + i + 1, j - 1, em::bx2); + const auto bx2_010 = EB(indx + i - 1, j, em::bx2); + const auto bx2_110 = EB(indx + i, j, em::bx2); + const auto bx2_210 = EB(indx + i + 1, j, em::bx2); + const auto bx2_020 = EB(indx + i - 1, j + 1, em::bx2); + const auto bx2_120 = EB(indx + i, j + 1, em::bx2); + const auto bx2_220 = EB(indx + i + 1, j + 1, em::bx2); + // clang-format on + + const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; + const auto bx2_1 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; + const auto bx2_2 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; + b0[1] = bx2_0 * w0py + bx2_1 * w1py + bx2_2 * w2py; // Bx3 // Interpolate --- (dual, dual) - c000 = EB(i - 1, j - 1, em::bx3); - c100 = EB(i, j - 1, em::bx3); - c200 = EB(i + 1, j - 1, em::bx3); - c010 = EB(i - 1, j, em::bx3); - c110 = EB(i, j, em::bx3); - c210 = EB(i + 1, j, em::bx3); - c020 = EB(i - 1, j + 1, em::bx3); - c120 = EB(i, j + 1, em::bx3); - c220 = EB(i + 1, j + 1, em::bx3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - b0[2] = c0 * w0y + c1 * w1y + c2 * w2y; + // clang-format off + const auto bx3_000 = EB(indx + i - 1, indy + j - 1, em::bx3); + const auto bx3_100 = EB(indx + i, indy + j - 1, em::bx3); + const auto bx3_200 = EB(indx + i + 1, indy + j - 1, em::bx3); + const auto bx3_010 = EB(indx + i - 1, indy + j, em::bx3); + const auto bx3_110 = EB(indx + i, indy + j, em::bx3); + const auto bx3_210 = EB(indx + i + 1, indy + j, em::bx3); + const auto bx3_020 = EB(indx + i - 1, indy + j + 1, em::bx3); + const auto bx3_120 = EB(indx + i, indy + j + 1, em::bx3); + const auto bx3_220 = EB(indx + i + 1, indy + j + 1, em::bx3); + // clang-format on + + const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; + const auto bx3_1 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; + const auto bx3_2 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; + b0[2] = bx3_0 * w0dy + bx3_1 * w1dy + bx3_2 * w2dy; } else if constexpr (D == Dim::_3D) { const int i { i1(p) + static_cast(N_GHOSTS) }; @@ -1266,312 +1312,333 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; - // Compute weights for second-order interpolation - real_t w0x = HALF * SQR(HALF - dx1_); - real_t w1x = static_cast(0.75) - SQR(dx1_); - real_t w2x = HALF * SQR(HALF + dx1_); - - real_t w0y = HALF * SQR(HALF - dx2_); - real_t w1y = static_cast(0.75) - SQR(dx2_); - real_t w2y = HALF * SQR(HALF + dx2_); + // direct interpolation of staggered grid + // primal = i, dual = i+ind + const int indx = static_cast(static_cast(dx1_ + HALF)); + const int indy = static_cast(static_cast(dx2_ + HALF)); + const int indz = static_cast(static_cast(dx3_ + HALF)); - real_t w0z = HALF * SQR(HALF - dx3_); - real_t w1z = static_cast(0.75) - SQR(dx3_); - real_t w2z = HALF * SQR(HALF + dx3_); + // Compute weights for second-order interpolation + // primal + const auto w0px = HALF * SQR(HALF - dx1_); + const auto w1px = static_cast(0.75) - SQR(dx1_); + const auto w2px = HALF * SQR(HALF + dx1_); + const auto w0py = HALF * SQR(HALF - dx2_); + const auto w1py = static_cast(0.75) - SQR(dx2_); + const auto w2py = HALF * SQR(HALF + dx2_); + const auto w0pz = HALF * SQR(HALF - dx3_); + const auto w1pz = static_cast(0.75) - SQR(dx3_); + const auto w2pz = HALF * SQR(HALF + dx3_); + // dual + const auto w0dx = HALF * SQR(HALF - dx1_); + const auto w1dx = static_cast(0.75) - SQR(dx1_); + const auto w2dx = HALF * SQR(HALF + dx1_); + const auto w0dy = HALF * SQR(HALF - dx2_); + const auto w1dy = static_cast(0.75) - SQR(dx2_); + const auto w2dy = HALF * SQR(HALF + dx2_); + const auto w0dz = HALF * SQR(HALF - dx3_); + const auto w1dz = static_cast(0.75) - SQR(dx3_); + const auto w2dz = HALF * SQR(HALF + dx3_); // Ex1 // Interpolate --- (dual, primal, primal) - real_t c000 = EB(i - 1, j - 1, k - 1, em::ex1); - real_t c100 = EB(i, j - 1, k - 1, em::ex1); - real_t c200 = EB(i + 1, j - 1, k - 1, em::ex1); - real_t c010 = EB(i - 1, j, k - 1, em::ex1); - real_t c110 = EB(i, j, k - 1, em::ex1); - real_t c210 = EB(i + 1, j, k - 1, em::ex1); - real_t c020 = EB(i - 1, j + 1, k - 1, em::ex1); - real_t c120 = EB(i, j + 1, k - 1, em::ex1); - real_t c220 = EB(i + 1, j + 1, k - 1, em::ex1); - - real_t c001 = EB(i - 1, j - 1, k, em::ex1); - real_t c101 = EB(i, j - 1, k, em::ex1); - real_t c201 = EB(i + 1, j - 1, k, em::ex1); - real_t c011 = EB(i - 1, j, k, em::ex1); - real_t c111 = EB(i, j, k, em::ex1); - real_t c211 = EB(i + 1, j, k, em::ex1); - real_t c021 = EB(i - 1, j + 1, k, em::ex1); - real_t c121 = EB(i, j + 1, k, em::ex1); - real_t c221 = EB(i + 1, j + 1, k, em::ex1); - - real_t c002 = EB(i - 1, j - 1, k + 1, em::ex1); - real_t c102 = EB(i, j - 1, k + 1, em::ex1); - real_t c202 = EB(i + 1, j - 1, k + 1, em::ex1); - real_t c012 = EB(i - 1, j, k + 1, em::ex1); - real_t c112 = EB(i, j, k + 1, em::ex1); - real_t c212 = EB(i + 1, j, k + 1, em::ex1); - real_t c022 = EB(i - 1, j + 1, k + 1, em::ex1); - real_t c122 = EB(i, j + 1, k + 1, em::ex1); - real_t c222 = EB(i + 1, j + 1, k + 1, em::ex1); - - real_t c0 = c000 * w0x + c100 * w1x + c200 * w2x; - real_t c1 = c010 * w0x + c110 * w1x + c210 * w2x; - real_t c2 = c020 * w0x + c120 * w1x + c220 * w2x; - real_t c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - real_t c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - real_t c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex1_000 = EB(indx + i - 1, j - 1, k - 1, em::ex1); + const auto ex1_100 = EB(indx + i, j - 1, k - 1, em::ex1); + const auto ex1_200 = EB(indx + i + 1, j - 1, k - 1, em::ex1); + const auto ex1_010 = EB(indx + i - 1, j, k - 1, em::ex1); + const auto ex1_110 = EB(indx + i, j, k - 1, em::ex1); + const auto ex1_210 = EB(indx + i + 1, j, k - 1, em::ex1); + const auto ex1_020 = EB(indx + i - 1, j + 1, k - 1, em::ex1); + const auto ex1_120 = EB(indx + i, j + 1, k - 1, em::ex1); + const auto ex1_220 = EB(indx + i + 1, j + 1, k - 1, em::ex1); + + const auto ex1_001 = EB(indx + i - 1, j - 1, k, em::ex1); + const auto ex1_101 = EB(indx + i, j - 1, k, em::ex1); + const auto ex1_201 = EB(indx + i + 1, j - 1, k, em::ex1); + const auto ex1_011 = EB(indx + i - 1, j, k, em::ex1); + const auto ex1_111 = EB(indx + i, j, k, em::ex1); + const auto ex1_211 = EB(indx + i + 1, j, k, em::ex1); + const auto ex1_021 = EB(indx + i - 1, j + 1, k, em::ex1); + const auto ex1_121 = EB(indx + i, j + 1, k, em::ex1); + const auto ex1_221 = EB(indx + i + 1, j + 1, k, em::ex1); + + const auto ex1_002 = EB(indx + i - 1, j - 1, k + 1, em::ex1); + const auto ex1_102 = EB(indx + i, j - 1, k + 1, em::ex1); + const auto ex1_202 = EB(indx + i + 1, j - 1, k + 1, em::ex1); + const auto ex1_012 = EB(indx + i - 1, j, k + 1, em::ex1); + const auto ex1_112 = EB(indx + i, j, k + 1, em::ex1); + const auto ex1_212 = EB(indx + i + 1, j, k + 1, em::ex1); + const auto ex1_022 = EB(indx + i - 1, j + 1, k + 1, em::ex1); + const auto ex1_122 = EB(indx + i, j + 1, k + 1, em::ex1); + const auto ex1_222 = EB(indx + i + 1, j + 1, k + 1, em::ex1); + // clang-format on + + const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; + const auto ex1_1_0 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; + const auto ex1_2_0 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; + const auto ex1_0_1 = ex1_001 * w0dx + ex1_101 * w1dx + ex1_201 * w2dx; + const auto ex1_1_1 = ex1_011 * w0dx + ex1_111 * w1dx + ex1_211 * w2dx; + const auto ex1_2_1 = ex1_021 * w0dx + ex1_121 * w1dx + ex1_221 * w2dx; + const auto ex1_0_2 = ex1_002 * w0dx + ex1_102 * w1dx + ex1_202 * w2dx; + const auto ex1_1_2 = ex1_012 * w0dx + ex1_112 * w1dx + ex1_212 * w2dx; + const auto ex1_2_2 = ex1_022 * w0dx + ex1_122 * w1dx + ex1_222 * w2dx; + + const auto ex1_00 = ex1_0_0 * w0py + ex1_1_0 * w1py + ex1_2_0 * w2py; + const auto ex1_01 = ex1_0_1 * w0py + ex1_1_1 * w1py + ex1_2_1 * w2py; + const auto ex1_02 = ex1_0_2 * w0py + ex1_1_2 * w1py + ex1_2_2 * w2py; + + e0[0] = ex1_00 * w0pz + ex1_01 * w1pz + ex1_02 * w2pz; // Ex2 // Interpolate -- (primal, dual, primal) - c000 = EB(i - 1, j - 1, k - 1, em::ex2); - c100 = EB(i, j - 1, k - 1, em::ex2); - c200 = EB(i + 1, j - 1, k - 1, em::ex2); - c010 = EB(i - 1, j, k - 1, em::ex2); - c110 = EB(i, j, k - 1, em::ex2); - c210 = EB(i + 1, j, k - 1, em::ex2); - c020 = EB(i - 1, j + 1, k - 1, em::ex2); - c120 = EB(i, j + 1, k - 1, em::ex2); - c220 = EB(i + 1, j + 1, k - 1, em::ex2); - - c001 = EB(i - 1, j - 1, k, em::ex2); - c101 = EB(i, j - 1, k, em::ex2); - c201 = EB(i + 1, j - 1, k, em::ex2); - c011 = EB(i - 1, j, k, em::ex2); - c111 = EB(i, j, k, em::ex2); - c211 = EB(i + 1, j, k, em::ex2); - c021 = EB(i - 1, j + 1, k, em::ex2); - c121 = EB(i, j + 1, k, em::ex2); - c221 = EB(i + 1, j + 1, k, em::ex2); - - c002 = EB(i - 1, j - 1, k + 1, em::ex2); - c102 = EB(i, j - 1, k + 1, em::ex2); - c202 = EB(i + 1, j - 1, k + 1, em::ex2); - c012 = EB(i - 1, j, k + 1, em::ex2); - c112 = EB(i, j, k + 1, em::ex2); - c212 = EB(i + 1, j, k + 1, em::ex2); - c022 = EB(i - 1, j + 1, k + 1, em::ex2); - c122 = EB(i, j + 1, k + 1, em::ex2); - c222 = EB(i + 1, j + 1, k + 1, em::ex2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex2_000 = EB(i - 1, indy + j - 1, k - 1, em::ex2); + const auto ex2_100 = EB(i, indy + j - 1, k - 1, em::ex2); + const auto ex2_200 = EB(i + 1, indy + j - 1, k - 1, em::ex2); + const auto ex2_010 = EB(i - 1, indy + j, k - 1, em::ex2); + const auto ex2_110 = EB(i, indy + j, k - 1, em::ex2); + const auto ex2_210 = EB(i + 1, indy + j, k - 1, em::ex2); + const auto ex2_020 = EB(i - 1, indy + j + 1, k - 1, em::ex2); + const auto ex2_120 = EB(i, indy + j + 1, k - 1, em::ex2); + const auto ex2_220 = EB(i + 1, indy + j + 1, k - 1, em::ex2); + + const auto ex2_001 = EB(i - 1, indy + j - 1, k, em::ex2); + const auto ex2_101 = EB(i, indy + j - 1, k, em::ex2); + const auto ex2_201 = EB(i + 1, indy + j - 1, k, em::ex2); + const auto ex2_011 = EB(i - 1, indy + j, k, em::ex2); + const auto ex2_111 = EB(i, indy + j, k, em::ex2); + const auto ex2_211 = EB(i + 1, indy + j, k, em::ex2); + const auto ex2_021 = EB(i - 1, indy + j + 1, k, em::ex2); + const auto ex2_121 = EB(i, indy + j + 1, k, em::ex2); + const auto ex2_221 = EB(i + 1, indy + j + 1, k, em::ex2); + + const auto ex2_002 = EB(i - 1, indy + j - 1, k + 1, em::ex2); + const auto ex2_102 = EB(i, indy + j - 1, k + 1, em::ex2); + const auto ex2_202 = EB(i + 1, indy + j - 1, k + 1, em::ex2); + const auto ex2_012 = EB(i - 1, indy + j, k + 1, em::ex2); + const auto ex2_112 = EB(i, indy + j, k + 1, em::ex2); + const auto ex2_212 = EB(i + 1, indy + j, k + 1, em::ex2); + const auto ex2_022 = EB(i - 1, indy + j + 1, k + 1, em::ex2); + const auto ex2_122 = EB(i, indy + j + 1, k + 1, em::ex2); + const auto ex2_222 = EB(i + 1, indy + j + 1, k + 1, em::ex2); + // clang-format on + + const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; + const auto ex2_1_0 = ex2_010 * w0px + ex2_110 * w1px + ex1_210 * w2px; + const auto ex2_2_0 = ex2_020 * w0px + ex2_120 * w1px + ex1_220 * w2px; + const auto ex2_0_1 = ex2_001 * w0px + ex2_101 * w1px + ex2_201 * w2px; + const auto ex2_1_1 = ex2_011 * w0px + ex2_111 * w1px + ex2_211 * w2px; + const auto ex2_2_1 = ex2_021 * w0px + ex2_121 * w1px + ex2_221 * w2px; + const auto ex2_0_2 = ex2_002 * w0px + ex2_102 * w1px + ex2_202 * w2px; + const auto ex2_1_2 = ex2_012 * w0px + ex2_112 * w1px + ex2_212 * w2px; + const auto ex2_2_2 = ex2_022 * w0px + ex2_122 * w1px + ex2_222 * w2px; + + const auto ex2_00 = ex2_0_0 * w0dy + ex2_1_0 * w1dy + ex2_2_0 * w2dy; + const auto ex2_01 = ex2_0_1 * w0dy + ex2_1_1 * w1dy + ex2_2_1 * w2dy; + const auto ex2_02 = ex2_0_2 * w0dy + ex2_1_2 * w1dy + ex2_2_2 * w2dy; + + e0[1] = ex2_00 * w0pz + ex2_01 * w1pz + ex2_02 * w2pz; // Ex3 // Interpolate -- (primal, primal, dual) - c000 = EB(i - 1, j - 1, k - 1, em::ex3); - c100 = EB(i, j - 1, k - 1, em::ex3); - c200 = EB(i + 1, j - 1, k - 1, em::ex3); - c010 = EB(i - 1, j, k - 1, em::ex3); - c110 = EB(i, j, k - 1, em::ex3); - c210 = EB(i + 1, j, k - 1, em::ex3); - c020 = EB(i - 1, j + 1, k - 1, em::ex3); - c120 = EB(i, j + 1, k - 1, em::ex3); - c220 = EB(i + 1, j + 1, k - 1, em::ex3); - - c001 = EB(i - 1, j - 1, k, em::ex3); - c101 = EB(i, j - 1, k, em::ex3); - c201 = EB(i + 1, j - 1, k, em::ex3); - c011 = EB(i - 1, j, k, em::ex3); - c111 = EB(i, j, k, em::ex3); - c211 = EB(i + 1, j, k, em::ex3); - c021 = EB(i - 1, j + 1, k, em::ex3); - c121 = EB(i, j + 1, k, em::ex3); - c221 = EB(i + 1, j + 1, k, em::ex3); - - c002 = EB(i - 1, j - 1, k + 1, em::ex3); - c102 = EB(i, j - 1, k + 1, em::ex3); - c202 = EB(i + 1, j - 1, k + 1, em::ex3); - c012 = EB(i - 1, j, k + 1, em::ex3); - c112 = EB(i, j, k + 1, em::ex3); - c212 = EB(i + 1, j, k + 1, em::ex3); - c022 = EB(i - 1, j + 1, k + 1, em::ex3); - c122 = EB(i, j + 1, k + 1, em::ex3); - c222 = EB(i + 1, j + 1, k + 1, em::ex3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - e0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto ex3_000 = EB(i - 1, j - 1, indz + k - 1, em::ex3); + const auto ex3_100 = EB(i, j - 1, indz + k - 1, em::ex3); + const auto ex3_200 = EB(i + 1, j - 1, indz + k - 1, em::ex3); + const auto ex3_010 = EB(i - 1, j, indz + k - 1, em::ex3); + const auto ex3_110 = EB(i, j, indz + k - 1, em::ex3); + const auto ex3_210 = EB(i + 1, j, indz + k - 1, em::ex3); + const auto ex3_020 = EB(i - 1, j + 1, indz + k - 1, em::ex3); + const auto ex3_120 = EB(i, j + 1, indz + k - 1, em::ex3); + const auto ex3_220 = EB(i + 1, j + 1, indz + k - 1, em::ex3); + + const auto ex3_001 = EB(i - 1, j - 1, indz + k, em::ex3); + const auto ex3_101 = EB(i, j - 1, indz + k, em::ex3); + const auto ex3_201 = EB(i + 1, j - 1, indz + k, em::ex3); + const auto ex3_011 = EB(i - 1, j, indz + k, em::ex3); + const auto ex3_111 = EB(i, j, indz + k, em::ex3); + const auto ex3_211 = EB(i + 1, j, indz + k, em::ex3); + const auto ex3_021 = EB(i - 1, j + 1, indz + k, em::ex3); + const auto ex3_121 = EB(i, j + 1, indz + k, em::ex3); + const auto ex3_221 = EB(i + 1, j + 1, indz + k, em::ex3); + + const auto ex3_002 = EB(i - 1, j - 1, indz + k + 1, em::ex3); + const auto ex3_102 = EB(i, j - 1, indz + k + 1, em::ex3); + const auto ex3_202 = EB(i + 1, j - 1, indz + k + 1, em::ex3); + const auto ex3_012 = EB(i - 1, j, indz + k + 1, em::ex3); + const auto ex3_112 = EB(i, j, indz + k + 1, em::ex3); + const auto ex3_212 = EB(i + 1, j, indz + k + 1, em::ex3); + const auto ex3_022 = EB(i - 1, j + 1, indz + k + 1, em::ex3); + const auto ex3_122 = EB(i, j + 1, indz + k + 1, em::ex3); + const auto ex3_222 = EB(i + 1, j + 1, indz + k + 1, em::ex3); + // clang-format on + + const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; + const auto ex3_1_0 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; + const auto ex3_2_0 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; + const auto ex3_0_1 = ex3_001 * w0px + ex3_101 * w1px + ex3_201 * w2px; + const auto ex3_1_1 = ex3_011 * w0px + ex3_111 * w1px + ex3_211 * w2px; + const auto ex3_2_1 = ex3_021 * w0px + ex3_121 * w1px + ex3_221 * w2px; + const auto ex3_0_2 = ex3_002 * w0px + ex3_102 * w1px + ex3_202 * w2px; + const auto ex3_1_2 = ex3_012 * w0px + ex3_112 * w1px + ex3_212 * w2px; + const auto ex3_2_2 = ex3_022 * w0px + ex3_122 * w1px + ex3_222 * w2px; + + const auto ex3_00 = ex3_0_0 * w0py + ex3_1_0 * w1py + ex3_2_0 * w2py; + const auto ex3_01 = ex3_0_1 * w0py + ex3_1_1 * w1py + ex3_2_1 * w2py; + const auto ex3_02 = ex3_0_2 * w0py + ex3_1_2 * w1py + ex3_2_2 * w2py; + + e0[2] = ex3_00 * w0dz + ex3_01 * w1dz + ex3_02 * w2dz; // Bx1 // Interpolate -- (primal, dual, dual) - c000 = EB(i - 1, j - 1, k - 1, em::bx1); - c100 = EB(i, j - 1, k - 1, em::bx1); - c200 = EB(i + 1, j - 1, k - 1, em::bx1); - c010 = EB(i - 1, j, k - 1, em::bx1); - c110 = EB(i, j, k - 1, em::bx1); - c210 = EB(i + 1, j, k - 1, em::bx1); - c020 = EB(i - 1, j + 1, k - 1, em::bx1); - c120 = EB(i, j + 1, k - 1, em::bx1); - c220 = EB(i + 1, j + 1, k - 1, em::bx1); - - c001 = EB(i - 1, j - 1, k, em::bx1); - c101 = EB(i, j - 1, k, em::bx1); - c201 = EB(i + 1, j - 1, k, em::bx1); - c011 = EB(i - 1, j, k, em::bx1); - c111 = EB(i, j, k, em::bx1); - c211 = EB(i + 1, j, k, em::bx1); - c021 = EB(i - 1, j + 1, k, em::bx1); - c121 = EB(i, j + 1, k, em::bx1); - c221 = EB(i + 1, j + 1, k, em::bx1); - - c002 = EB(i - 1, j - 1, k + 1, em::bx1); - c102 = EB(i, j - 1, k + 1, em::bx1); - c202 = EB(i + 1, j - 1, k + 1, em::bx1); - c012 = EB(i - 1, j, k + 1, em::bx1); - c112 = EB(i, j, k + 1, em::bx1); - c212 = EB(i + 1, j, k + 1, em::bx1); - c022 = EB(i - 1, j + 1, k + 1, em::bx1); - c122 = EB(i, j + 1, k + 1, em::bx1); - c222 = EB(i + 1, j + 1, k + 1, em::bx1); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[0] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_100 = EB(i, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::bx1); + const auto bx1_010 = EB(i - 1, indy + j, indz + k - 1, em::bx1); + const auto bx1_110 = EB(i, indy + j, indz + k - 1, em::bx1); + const auto bx1_210 = EB(i + 1, indy + j, indz + k - 1, em::bx1); + const auto bx1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::bx1); + const auto bx1_120 = EB(i, indy + j + 1, indz + k - 1, em::bx1); + const auto bx1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::bx1); + + const auto bx1_001 = EB(i - 1, indy + j - 1, indz + k, em::bx1); + const auto bx1_101 = EB(i, indy + j - 1, indz + k, em::bx1); + const auto bx1_201 = EB(i + 1, indy + j - 1, indz + k, em::bx1); + const auto bx1_011 = EB(i - 1, indy + j, indz + k, em::bx1); + const auto bx1_111 = EB(i, indy + j, indz + k, em::bx1); + const auto bx1_211 = EB(i + 1, indy + j, indz + k, em::bx1); + const auto bx1_021 = EB(i - 1, indy + j + 1, indz + k, em::bx1); + const auto bx1_121 = EB(i, indy + j + 1, indz + k, em::bx1); + const auto bx1_221 = EB(i + 1, indy + j + 1, indz + k, em::bx1); + + const auto bx1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_102 = EB(i, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::bx1); + const auto bx1_012 = EB(i - 1, indy + j, indz + k + 1, em::bx1); + const auto bx1_112 = EB(i, indy + j, indz + k + 1, em::bx1); + const auto bx1_212 = EB(i + 1, indy + j, indz + k + 1, em::bx1); + const auto bx1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_122 = EB(i, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::bx1); + // clang-format on + + const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; + const auto bx1_1_0 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; + const auto bx1_2_0 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; + const auto bx1_0_1 = bx1_001 * w0px + bx1_101 * w1px + bx1_201 * w2px; + const auto bx1_1_1 = bx1_011 * w0px + bx1_111 * w1px + bx1_211 * w2px; + const auto bx1_2_1 = bx1_021 * w0px + bx1_121 * w1px + bx1_221 * w2px; + const auto bx1_0_2 = bx1_002 * w0px + bx1_102 * w1px + bx1_202 * w2px; + const auto bx1_1_2 = bx1_012 * w0px + bx1_112 * w1px + bx1_212 * w2px; + const auto bx1_2_2 = bx1_022 * w0px + bx1_122 * w1px + bx1_222 * w2px; + + const auto bx1_00 = bx1_0_0 * w0dy + bx1_1_0 * w1dy + bx1_2_0 * w2dy; + const auto bx1_01 = bx1_0_1 * w0dy + bx1_1_1 * w1dy + bx1_2_1 * w2dy; + const auto bx1_02 = bx1_0_2 * w0dy + bx1_1_2 * w1dy + bx1_2_2 * w2dy; + + b0[0] = bx1_00 * w0dz + bx1_01 * w1dz + bx1_02 * w2dz; // Bx2 // Interpolate -- (dual, primal, dual) - c000 = EB(i - 1, j - 1, k - 1, em::bx2); - c100 = EB(i, j - 1, k - 1, em::bx2); - c200 = EB(i + 1, j - 1, k - 1, em::bx2); - c010 = EB(i - 1, j, k - 1, em::bx2); - c110 = EB(i, j, k - 1, em::bx2); - c210 = EB(i + 1, j, k - 1, em::bx2); - c020 = EB(i - 1, j + 1, k - 1, em::bx2); - c120 = EB(i, j + 1, k - 1, em::bx2); - c220 = EB(i + 1, j + 1, k - 1, em::bx2); - - c001 = EB(i - 1, j - 1, k, em::bx2); - c101 = EB(i, j - 1, k, em::bx2); - c201 = EB(i + 1, j - 1, k, em::bx2); - c011 = EB(i - 1, j, k, em::bx2); - c111 = EB(i, j, k, em::bx2); - c211 = EB(i + 1, j, k, em::bx2); - c021 = EB(i - 1, j + 1, k, em::bx2); - c121 = EB(i, j + 1, k, em::bx2); - c221 = EB(i + 1, j + 1, k, em::bx2); - - c002 = EB(i - 1, j - 1, k + 1, em::bx2); - c102 = EB(i, j - 1, k + 1, em::bx2); - c202 = EB(i + 1, j - 1, k + 1, em::bx2); - c012 = EB(i - 1, j, k + 1, em::bx2); - c112 = EB(i, j, k + 1, em::bx2); - c212 = EB(i + 1, j, k + 1, em::bx2); - c022 = EB(i - 1, j + 1, k + 1, em::bx2); - c122 = EB(i, j + 1, k + 1, em::bx2); - c222 = EB(i + 1, j + 1, k + 1, em::bx2); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[1] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::bx2); + const auto bx2_100 = EB(indx + i, j - 1, indz + k - 1, em::bx2); + const auto bx2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::bx2); + const auto bx2_010 = EB(indx + i - 1, j, indz + k - 1, em::bx2); + const auto bx2_110 = EB(indx + i, j, indz + k - 1, em::bx2); + const auto bx2_210 = EB(indx + i + 1, j, indz + k - 1, em::bx2); + const auto bx2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::bx2); + const auto bx2_120 = EB(indx + i, j + 1, indz + k - 1, em::bx2); + const auto bx2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::bx2); + + const auto bx2_001 = EB(indx + i - 1, j - 1, indz + k, em::bx2); + const auto bx2_101 = EB(indx + i, j - 1, indz + k, em::bx2); + const auto bx2_201 = EB(indx + i + 1, j - 1, indz + k, em::bx2); + const auto bx2_011 = EB(indx + i - 1, j, indz + k, em::bx2); + const auto bx2_111 = EB(indx + i, j, indz + k, em::bx2); + const auto bx2_211 = EB(indx + i + 1, j, indz + k, em::bx2); + const auto bx2_021 = EB(indx + i - 1, j + 1, indz + k, em::bx2); + const auto bx2_121 = EB(indx + i, j + 1, indz + k, em::bx2); + const auto bx2_221 = EB(indx + i + 1, j + 1, indz + k, em::bx2); + + const auto bx2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::bx2); + const auto bx2_102 = EB(indx + i, j - 1, indz + k + 1, em::bx2); + const auto bx2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::bx2); + const auto bx2_012 = EB(indx + i - 1, j, indz + k + 1, em::bx2); + const auto bx2_112 = EB(indx + i, j, indz + k + 1, em::bx2); + const auto bx2_212 = EB(indx + i + 1, j, indz + k + 1, em::bx2); + const auto bx2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::bx2); + const auto bx2_122 = EB(indx + i, j + 1, indz + k + 1, em::bx2); + const auto bx2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::bx2); + // clang-format on + + const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; + const auto bx2_1_0 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; + const auto bx2_2_0 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; + const auto bx2_0_1 = bx2_001 * w0dx + bx2_101 * w1dx + bx2_201 * w2dx; + const auto bx2_1_1 = bx2_011 * w0dx + bx2_111 * w1dx + bx2_211 * w2dx; + const auto bx2_2_1 = bx2_021 * w0dx + bx2_121 * w1dx + bx2_221 * w2dx; + const auto bx2_0_2 = bx2_002 * w0dx + bx2_102 * w1dx + bx2_202 * w2dx; + const auto bx2_1_2 = bx2_012 * w0dx + bx2_112 * w1dx + bx2_212 * w2dx; + const auto bx2_2_2 = bx2_022 * w0dx + bx2_122 * w1dx + bx2_222 * w2dx; + + const auto bx2_00 = bx2_0_0 * w0py + bx2_1_0 * w1py + bx2_2_0 * w2py; + const auto bx2_01 = bx2_0_1 * w0py + bx2_1_1 * w1py + bx2_2_1 * w2py; + const auto bx2_02 = bx2_0_2 * w0py + bx2_1_2 * w1py + bx2_2_2 * w2py; + + b0[1] = bx2_00 * w0dz + bx2_01 * w1dz + bx2_02 * w2dz; // Bx3 // Interpolate -- (dual, dual, primal) - c000 = EB(i - 1, j - 1, k - 1, em::bx3); - c100 = EB(i, j - 1, k - 1, em::bx3); - c200 = EB(i + 1, j - 1, k - 1, em::bx3); - c010 = EB(i - 1, j, k - 1, em::bx3); - c110 = EB(i, j, k - 1, em::bx3); - c210 = EB(i + 1, j, k - 1, em::bx3); - c020 = EB(i - 1, j + 1, k - 1, em::bx3); - c120 = EB(i, j + 1, k - 1, em::bx3); - c220 = EB(i + 1, j + 1, k - 1, em::bx3); - - c001 = EB(i - 1, j - 1, k, em::bx3); - c101 = EB(i, j - 1, k, em::bx3); - c201 = EB(i + 1, j - 1, k, em::bx3); - c011 = EB(i - 1, j, k, em::bx3); - c111 = EB(i, j, k, em::bx3); - c211 = EB(i + 1, j, k, em::bx3); - c021 = EB(i - 1, j + 1, k, em::bx3); - c121 = EB(i, j + 1, k, em::bx3); - c221 = EB(i + 1, j + 1, k, em::bx3); - - c002 = EB(i - 1, j - 1, k + 1, em::bx3); - c102 = EB(i, j - 1, k + 1, em::bx3); - c202 = EB(i + 1, j - 1, k + 1, em::bx3); - c012 = EB(i - 1, j, k + 1, em::bx3); - c112 = EB(i, j, k + 1, em::bx3); - c212 = EB(i + 1, j, k + 1, em::bx3); - c022 = EB(i - 1, j + 1, k + 1, em::bx3); - c122 = EB(i, j + 1, k + 1, em::bx3); - c222 = EB(i + 1, j + 1, k + 1, em::bx3); - - c0 = c000 * w0x + c100 * w1x + c200 * w2x; - c1 = c010 * w0x + c110 * w1x + c210 * w2x; - c2 = c020 * w0x + c120 * w1x + c220 * w2x; - c00 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c001 * w0x + c101 * w1x + c201 * w2x; - c1 = c011 * w0x + c111 * w1x + c211 * w2x; - c2 = c021 * w0x + c121 * w1x + c221 * w2x; - c01 = c0 * w0y + c1 * w1y + c2 * w2y; - - c0 = c002 * w0x + c102 * w1x + c202 * w2x; - c1 = c012 * w0x + c112 * w1x + c212 * w2x; - c2 = c022 * w0x + c122 * w1x + c222 * w2x; - c02 = c0 * w0y + c1 * w1y + c2 * w2y; - - b0[2] = c00 * w0z + c01 * w1z + c02 * w2z; + // clang-format off + const auto bx3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::bx3); + const auto bx3_100 = EB(indx + i, indy + j - 1, k - 1, em::bx3); + const auto bx3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::bx3); + const auto bx3_010 = EB(indx + i - 1, indy + j, k - 1, em::bx3); + const auto bx3_110 = EB(indx + i, indy + j, k - 1, em::bx3); + const auto bx3_210 = EB(indx + i + 1, indy + j, k - 1, em::bx3); + const auto bx3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::bx3); + const auto bx3_120 = EB(indx + i, indy + j + 1, k - 1, em::bx3); + const auto bx3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::bx3); + + const auto bx3_001 = EB(indx + i - 1, indy + j - 1, k, em::bx3); + const auto bx3_101 = EB(indx + i, indy + j - 1, k, em::bx3); + const auto bx3_201 = EB(indx + i + 1, indy + j - 1, k, em::bx3); + const auto bx3_011 = EB(indx + i - 1, indy + j, k, em::bx3); + const auto bx3_111 = EB(indx + i, indy + j, k, em::bx3); + const auto bx3_211 = EB(indx + i + 1, indy + j, k, em::bx3); + const auto bx3_021 = EB(indx + i - 1, indy + j + 1, k, em::bx3); + const auto bx3_121 = EB(indx + i, indy + j + 1, k, em::bx3); + const auto bx3_221 = EB(indx + i + 1, indy + j + 1, k, em::bx3); + + const auto bx3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::bx3); + const auto bx3_102 = EB(indx + i, indy + j - 1, k + 1, em::bx3); + const auto bx3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::bx3); + const auto bx3_012 = EB(indx + i - 1, indy + j, k + 1, em::bx3); + const auto bx3_112 = EB(indx + i, indy + j, k + 1, em::bx3); + const auto bx3_212 = EB(indx + i + 1, indy + j, k + 1, em::bx3); + const auto bx3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::bx3); + const auto bx3_122 = EB(indx + i, indy + j + 1, k + 1, em::bx3); + const auto bx3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::bx3); + // clang-format on + + const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; + const auto bx3_1_0 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; + const auto bx3_2_0 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; + const auto bx3_0_1 = bx3_001 * w0dx + bx3_101 * w1dx + bx3_201 * w2dx; + const auto bx3_1_1 = bx3_011 * w0dx + bx3_111 * w1dx + bx3_211 * w2dx; + const auto bx3_2_1 = bx3_021 * w0dx + bx3_121 * w1dx + bx3_221 * w2dx; + const auto bx3_0_2 = bx3_002 * w0dx + bx3_102 * w1dx + bx3_202 * w2dx; + const auto bx3_1_2 = bx3_012 * w0dx + bx3_112 * w1dx + bx3_212 * w2dx; + const auto bx3_2_2 = bx3_022 * w0dx + bx3_122 * w1dx + bx3_222 * w2dx; + + const auto bx3_00 = bx3_0_0 * w0dy + bx3_1_0 * w1dy + bx3_2_0 * w2dy; + const auto bx3_01 = bx3_0_1 * w0dy + bx3_1_1 * w1dy + bx3_2_1 * w2dy; + const auto bx3_02 = bx3_0_2 * w0dy + bx3_1_2 * w1dy + bx3_2_2 * w2dy; + + b0[2] = bx3_00 * w0pz + bx3_01 * w1pz + bx3_02 * w2pz; } } From c6b9cc80b335f5e5fdf8dd7bf44c276fde672445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 16:30:36 -0500 Subject: [PATCH 038/154] bugfixes --- src/kernels/particle_pusher_sr.hpp | 566 +++++++++++++++-------------- 1 file changed, 284 insertions(+), 282 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 7ccd06a80..c9dd83777 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1105,53 +1105,54 @@ namespace kernel::sr { const auto dx1_center = static_cast(dx1_less_half) - dx1_; // direct interpolation of staggered grid - // primal = i, dual = i+ind - const int ind = static_cast(static_cast(dx1_ + HALF)); + // primal = i+ind, dual = i + const int indx = static_cast(static_cast(dx1_ + HALF)); // Compute weights for second-order interpolation // primal - const auto wp0 = HALF * SQR(HALF - dx1_); - const auto wp1 = static_cast(0.75) - SQR(dx1_); - const auto wp2 = HALF * SQR(HALF + dx1_); - // dual - ToDo! - const auto wd0 = HALF * SQR(HALF - dx1_); - const auto wd1 = static_cast(0.75) - SQR(dx1_); - const auto wd2 = HALF * SQR(HALF + dx1_); + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + + // dual + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; // Ex1 (dual grid) - const auto ex1_0 = EB(ind + i - 1, em::ex1); - const auto ex1_1 = EB(ind + i, em::ex1); - const auto ex1_2 = EB(ind + i + 1, em::ex1); + const auto ex1_0 = EB(i - 1, em::ex1); + const auto ex1_1 = EB(i, em::ex1); + const auto ex1_2 = EB(i + 1, em::ex1); e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; // Ex2 (primal grid) - const auto ex2_0 = EB(i - 1, em::ex2); - const auto ex2_1 = EB(i, em::ex2); - const auto ex2_2 = EB(i + 1, em::ex2); + const auto ex2_0 = EB(indx + i - 1, em::ex2); + const auto ex2_1 = EB(indx + i, em::ex2); + const auto ex2_2 = EB(indx + i + 1, em::ex2); e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; // Ex3 (primal grid) - const auto ex3_0 = EB(i - 1, em::ex3); - const auto ex3_1 = EB(i, em::ex3); // Second grid point - const auto ex3_2 = EB(i + 1, em::ex3); + const auto ex3_0 = EB(indx + i - 1, em::ex3); + const auto ex3_1 = EB(indx + i, em::ex3); + const auto ex3_2 = EB(indx + i + 1, em::ex3); e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; // Bx1 (primal grid) - const auto bx1_0 = EB(i - 1, em::bx1); - const auto bx1_1 = EB(i, em::bx1); // Second grid point - const auto bx1_2 = EB(i + 1, em::bx1); + const auto bx1_0 = EB(indx + i - 1, em::bx1); + const auto bx1_1 = EB(indx + i, em::bx1); + const auto bx1_2 = EB(indx + i + 1, em::bx1); b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; // Bx2 (dual grid) - const auto bx2_0 = EB(ind + i - 2, em::bx2); - const auto bx2_1 = EB(ind + i - 1, em::bx2); // Second grid point - const auto bx2_2 = EB(ind + i, em::bx2); + const auto bx2_0 = EB(i - 1, em::bx2); + const auto bx2_1 = EB(i, em::bx2); + const auto bx2_2 = EB(i + 1, em::bx2); b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; // Bx3 (dual grid) - const auto bx3_0 = EB(ind + i - 2, em::bx3); - const auto bx3_1 = EB(ind + i - 1, em::bx3); // Second grid point - const auto bx3_2 = EB(ind + i, em::bx3); + const auto bx3_0 = EB(i - 1, em::bx3); + const auto bx3_1 = EB(i, em::bx3); + const auto bx3_2 = EB(i + 1, em::bx3); b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; } else if constexpr (D == Dim::_2D) { @@ -1169,7 +1170,7 @@ namespace kernel::sr { const auto dx2_center = static_cast(dx2_less_half) - dx2_; // direct interpolation of staggered grid - // primal = i, dual = i+ind + // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); const int indy = static_cast(static_cast(dx2_ + HALF)); @@ -1182,26 +1183,26 @@ namespace kernel::sr { const auto w1py = static_cast(0.75) - SQR(dx2_center); const auto w2py = HALF * SQR(HALF - dx2_center); - // dual - ToDo! - const auto w0dx = HALF * SQR(HALF + dx1_center); - const auto w1dx = static_cast(0.75) - SQR(dx1_center); - const auto w2dx = HALF * SQR(HALF - dx1_center); - const auto w0dy = HALF * SQR(HALF + dx2_center); - const auto w1dy = static_cast(0.75) - SQR(dx2_center); - const auto w2dy = HALF * SQR(HALF - dx2_center); + // dual + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; + const auto w0dy = HALF * SQR(ONE - dx2_); + const auto w2dy = HALF * SQR(dx2_); + const auto w1dy = ONE - w0dx - w2dy; // Ex1 // Interpolate --- (dual, primal) // clang-format off - const auto ex1_000 = EB(indx + i - 1, j - 1, em::ex1); - const auto ex1_100 = EB(indx + i, j - 1, em::ex1); - const auto ex1_200 = EB(indx + i + 1, j - 1, em::ex1); - const auto ex1_010 = EB(indx + i - 1, j, em::ex1); - const auto ex1_110 = EB(indx + i, j, em::ex1); - const auto ex1_210 = EB(indx + i + 1, j, em::ex1); - const auto ex1_020 = EB(indx + i - 1, j + 1, em::ex1); - const auto ex1_120 = EB(indx + i, j + 1, em::ex1); - const auto ex1_220 = EB(indx + i + 1, j + 1, em::ex1); + const auto ex1_000 = EB(i - 1, indy + j - 1, em::ex1); + const auto ex1_100 = EB(i, indy + j - 1, em::ex1); + const auto ex1_200 = EB(i + 1, indy + j - 1, em::ex1); + const auto ex1_010 = EB(i - 1, indy + j, em::ex1); + const auto ex1_110 = EB(i, indy + j, em::ex1); + const auto ex1_210 = EB(i + 1, indy + j, em::ex1); + const auto ex1_020 = EB(i - 1, indy + j + 1, em::ex1); + const auto ex1_120 = EB(i, indy + j + 1, em::ex1); + const auto ex1_220 = EB(i + 1, indy + j + 1, em::ex1); // clang-format on const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; @@ -1212,15 +1213,15 @@ namespace kernel::sr { // Ex2 // Interpolate --- (primal, dual) // clang-format off - const auto ex2_000 = EB(i - 1, indy + j - 1, em::ex2); - const auto ex2_100 = EB(i, indy + j - 1, em::ex2); - const auto ex2_200 = EB(i + 1, indy + j - 1, em::ex2); - const auto ex2_010 = EB(i - 1, indy + j, em::ex2); - const auto ex2_110 = EB(i, indy + j, em::ex2); - const auto ex2_210 = EB(i + 1, indy + j, em::ex2); - const auto ex2_020 = EB(i - 1, indy + j + 1, em::ex2); - const auto ex2_120 = EB(i, indy + j + 1, em::ex2); - const auto ex2_220 = EB(i + 1, indy + j + 1, em::ex2); + const auto ex2_000 = EB(indx + i - 1, j - 1, em::ex2); + const auto ex2_100 = EB(indx + i, j - 1, em::ex2); + const auto ex2_200 = EB(indx + i + 1, j - 1, em::ex2); + const auto ex2_010 = EB(indx + i - 1, j, em::ex2); + const auto ex2_110 = EB(indx + i, j, em::ex2); + const auto ex2_210 = EB(indx + i + 1, j, em::ex2); + const auto ex2_020 = EB(indx + i - 1, j + 1, em::ex2); + const auto ex2_120 = EB(indx + i, j + 1, em::ex2); + const auto ex2_220 = EB(indx + i + 1, j + 1, em::ex2); // clang-format on const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; @@ -1231,15 +1232,15 @@ namespace kernel::sr { // Ex3 // Interpolate --- (primal, primal) // clang-format off - const auto ex3_000 = EB(i - 1, j - 1, em::ex3); - const auto ex3_100 = EB(i, j - 1, em::ex3); - const auto ex3_200 = EB(i + 1, j - 1, em::ex3); - const auto ex3_010 = EB(i - 1, j, em::ex3); - const auto ex3_110 = EB(i, j, em::ex3); - const auto ex3_210 = EB(i + 1, j, em::ex3); - const auto ex3_020 = EB(i - 1, j + 1, em::ex3); - const auto ex3_120 = EB(i, j + 1, em::ex3); - const auto ex3_220 = EB(i + 1, j + 1, em::ex3); + const auto ex3_000 = EB(indx + i - 1, indy + j - 1, em::ex3); + const auto ex3_100 = EB(indx + i, indy + j - 1, em::ex3); + const auto ex3_200 = EB(indx + i + 1, indy + j - 1, em::ex3); + const auto ex3_010 = EB(indx + i - 1, indy + j, em::ex3); + const auto ex3_110 = EB(indx + i, indy + j, em::ex3); + const auto ex3_210 = EB(indx + i + 1, indy + j, em::ex3); + const auto ex3_020 = EB(indx + i - 1, indy + j + 1, em::ex3); + const auto ex3_120 = EB(indx + i, indy + j + 1, em::ex3); + const auto ex3_220 = EB(indx + i + 1, indy + j + 1, em::ex3); // clang-format on const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; @@ -1250,15 +1251,15 @@ namespace kernel::sr { // Bx1 // Interpolate --- (primal, dual) // clang-format off - const auto bx1_000 = EB(i - 1, indy + j - 1, em::bx1); - const auto bx1_100 = EB(i, indy + j - 1, em::bx1); - const auto bx1_200 = EB(i + 1, indy + j - 1, em::bx1); - const auto bx1_010 = EB(i - 1, indy + j, em::bx1); - const auto bx1_110 = EB(i, indy + j, em::bx1); - const auto bx1_210 = EB(i + 1, indy + j, em::bx1); - const auto bx1_020 = EB(i - 1, indy + j + 1, em::bx1); - const auto bx1_120 = EB(i, indy + j + 1, em::bx1); - const auto bx1_220 = EB(i + 1, indy + j + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, indy + j - 1, em::bx1); + const auto bx1_100 = EB(indx + i, indy + j - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, indy + j - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, indy + j, em::bx1); + const auto bx1_110 = EB(indx + i, indy + j, em::bx1); + const auto bx1_210 = EB(indx + i + 1, indy + j, em::bx1); + const auto bx1_020 = EB(indx + i - 1, indy + j + 1, em::bx1); + const auto bx1_120 = EB(indx + i, indy + j + 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, indy + j + 1, em::bx1); // clang-format on const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; @@ -1269,15 +1270,15 @@ namespace kernel::sr { // Bx2 // Interpolate --- (dual, primal) // clang-format off - const auto bx2_000 = EB(indx + i - 1, j - 1, em::bx2); - const auto bx2_100 = EB(indx + i, j - 1, em::bx2); - const auto bx2_200 = EB(indx + i + 1, j - 1, em::bx2); - const auto bx2_010 = EB(indx + i - 1, j, em::bx2); - const auto bx2_110 = EB(indx + i, j, em::bx2); - const auto bx2_210 = EB(indx + i + 1, j, em::bx2); - const auto bx2_020 = EB(indx + i - 1, j + 1, em::bx2); - const auto bx2_120 = EB(indx + i, j + 1, em::bx2); - const auto bx2_220 = EB(indx + i + 1, j + 1, em::bx2); + const auto bx2_000 = EB(i - 1, indy + j - 1, em::bx2); + const auto bx2_100 = EB(i, indy + j - 1, em::bx2); + const auto bx2_200 = EB(i + 1, indy + j - 1, em::bx2); + const auto bx2_010 = EB(i - 1, indy + j, em::bx2); + const auto bx2_110 = EB(i, indy + j, em::bx2); + const auto bx2_210 = EB(i + 1, indy + j, em::bx2); + const auto bx2_020 = EB(i - 1, indy + j + 1, em::bx2); + const auto bx2_120 = EB(i, indy + j + 1, em::bx2); + const auto bx2_220 = EB(i + 1, indy + j + 1, em::bx2); // clang-format on const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; @@ -1288,15 +1289,15 @@ namespace kernel::sr { // Bx3 // Interpolate --- (dual, dual) // clang-format off - const auto bx3_000 = EB(indx + i - 1, indy + j - 1, em::bx3); - const auto bx3_100 = EB(indx + i, indy + j - 1, em::bx3); - const auto bx3_200 = EB(indx + i + 1, indy + j - 1, em::bx3); - const auto bx3_010 = EB(indx + i - 1, indy + j, em::bx3); - const auto bx3_110 = EB(indx + i, indy + j, em::bx3); - const auto bx3_210 = EB(indx + i + 1, indy + j, em::bx3); - const auto bx3_020 = EB(indx + i - 1, indy + j + 1, em::bx3); - const auto bx3_120 = EB(indx + i, indy + j + 1, em::bx3); - const auto bx3_220 = EB(indx + i + 1, indy + j + 1, em::bx3); + const auto bx3_000 = EB(i - 1, j - 1, em::bx3); + const auto bx3_100 = EB(i, j - 1, em::bx3); + const auto bx3_200 = EB(i + 1, j - 1, em::bx3); + const auto bx3_010 = EB(i - 1, j, em::bx3); + const auto bx3_110 = EB(i, j, em::bx3); + const auto bx3_210 = EB(i + 1, j, em::bx3); + const auto bx3_020 = EB(i - 1, j + 1, em::bx3); + const auto bx3_120 = EB(i, j + 1, em::bx3); + const auto bx3_220 = EB(i + 1, j + 1, em::bx3); // clang-format on const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; @@ -1312,66 +1313,77 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; + const int dx1_less_half = static_cast(dx1_ < + static_cast(0.5)); + const auto dx1_center = static_cast(dx1_less_half) - dx1_; + + const int dx2_less_half = static_cast(dx2_ < + static_cast(0.5)); + const auto dx2_center = static_cast(dx2_less_half) - dx2_; + + const int dx3_less_half = static_cast(dx3_ < + static_cast(0.5)); + const auto dx3_center = static_cast(dx3_less_half) - dx3_; + // direct interpolation of staggered grid - // primal = i, dual = i+ind + // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); const int indy = static_cast(static_cast(dx2_ + HALF)); const int indz = static_cast(static_cast(dx3_ + HALF)); // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF - dx1_); - const auto w1px = static_cast(0.75) - SQR(dx1_); - const auto w2px = HALF * SQR(HALF + dx1_); - const auto w0py = HALF * SQR(HALF - dx2_); - const auto w1py = static_cast(0.75) - SQR(dx2_); - const auto w2py = HALF * SQR(HALF + dx2_); - const auto w0pz = HALF * SQR(HALF - dx3_); - const auto w1pz = static_cast(0.75) - SQR(dx3_); - const auto w2pz = HALF * SQR(HALF + dx3_); + const auto w0px = HALF * SQR(HALF + dx1_center); + const auto w1px = static_cast(0.75) - SQR(dx1_center); + const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0py = HALF * SQR(HALF + dx2_center); + const auto w1py = static_cast(0.75) - SQR(dx2_center); + const auto w2py = HALF * SQR(HALF - dx2_center); + const auto w0pz = HALF * SQR(HALF + dx3_center); + const auto w1pz = static_cast(0.75) - SQR(dx3_center); + const auto w2pz = HALF * SQR(HALF - dx3_center); + // dual - const auto w0dx = HALF * SQR(HALF - dx1_); - const auto w1dx = static_cast(0.75) - SQR(dx1_); - const auto w2dx = HALF * SQR(HALF + dx1_); - const auto w0dy = HALF * SQR(HALF - dx2_); - const auto w1dy = static_cast(0.75) - SQR(dx2_); - const auto w2dy = HALF * SQR(HALF + dx2_); - const auto w0dz = HALF * SQR(HALF - dx3_); - const auto w1dz = static_cast(0.75) - SQR(dx3_); - const auto w2dz = HALF * SQR(HALF + dx3_); + const auto w0dx = HALF * SQR(ONE - dx1_); + const auto w2dx = HALF * SQR(dx1_); + const auto w1dx = ONE - w0dx - w2dx; + const auto w0dy = HALF * SQR(ONE - dx2_); + const auto w2dy = HALF * SQR(dx2_); + const auto w1dy = ONE - w0dx - w2dy; + const auto w0dz = HALF * SQR(ONE - dx3_); + const auto w2dz = HALF * SQR(dx3_); + const auto w1dz = ONE - w0dx - w2dy; // Ex1 // Interpolate --- (dual, primal, primal) // clang-format off - const auto ex1_000 = EB(indx + i - 1, j - 1, k - 1, em::ex1); - const auto ex1_100 = EB(indx + i, j - 1, k - 1, em::ex1); - const auto ex1_200 = EB(indx + i + 1, j - 1, k - 1, em::ex1); - const auto ex1_010 = EB(indx + i - 1, j, k - 1, em::ex1); - const auto ex1_110 = EB(indx + i, j, k - 1, em::ex1); - const auto ex1_210 = EB(indx + i + 1, j, k - 1, em::ex1); - const auto ex1_020 = EB(indx + i - 1, j + 1, k - 1, em::ex1); - const auto ex1_120 = EB(indx + i, j + 1, k - 1, em::ex1); - const auto ex1_220 = EB(indx + i + 1, j + 1, k - 1, em::ex1); - - const auto ex1_001 = EB(indx + i - 1, j - 1, k, em::ex1); - const auto ex1_101 = EB(indx + i, j - 1, k, em::ex1); - const auto ex1_201 = EB(indx + i + 1, j - 1, k, em::ex1); - const auto ex1_011 = EB(indx + i - 1, j, k, em::ex1); - const auto ex1_111 = EB(indx + i, j, k, em::ex1); - const auto ex1_211 = EB(indx + i + 1, j, k, em::ex1); - const auto ex1_021 = EB(indx + i - 1, j + 1, k, em::ex1); - const auto ex1_121 = EB(indx + i, j + 1, k, em::ex1); - const auto ex1_221 = EB(indx + i + 1, j + 1, k, em::ex1); - - const auto ex1_002 = EB(indx + i - 1, j - 1, k + 1, em::ex1); - const auto ex1_102 = EB(indx + i, j - 1, k + 1, em::ex1); - const auto ex1_202 = EB(indx + i + 1, j - 1, k + 1, em::ex1); - const auto ex1_012 = EB(indx + i - 1, j, k + 1, em::ex1); - const auto ex1_112 = EB(indx + i, j, k + 1, em::ex1); - const auto ex1_212 = EB(indx + i + 1, j, k + 1, em::ex1); - const auto ex1_022 = EB(indx + i - 1, j + 1, k + 1, em::ex1); - const auto ex1_122 = EB(indx + i, j + 1, k + 1, em::ex1); - const auto ex1_222 = EB(indx + i + 1, j + 1, k + 1, em::ex1); + const auto ex1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_100 = EB(i, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::ex1); + const auto ex1_010 = EB(i - 1, indy + j, indz + k - 1, em::ex1); + const auto ex1_110 = EB(i, indy + j, indz + k - 1, em::ex1); + const auto ex1_210 = EB(i + 1, indy + j, indz + k - 1, em::ex1); + const auto ex1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_120 = EB(i, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::ex1); + const auto ex1_001 = EB(i - 1, indy + j - 1, indz + k, em::ex1); + const auto ex1_101 = EB(i, indy + j - 1, indz + k, em::ex1); + const auto ex1_201 = EB(i + 1, indy + j - 1, indz + k, em::ex1); + const auto ex1_011 = EB(i - 1, indy + j, indz + k, em::ex1); + const auto ex1_111 = EB(i, indy + j, indz + k, em::ex1); + const auto ex1_211 = EB(i + 1, indy + j, indz + k, em::ex1); + const auto ex1_021 = EB(i - 1, indy + j + 1, indz + k, em::ex1); + const auto ex1_121 = EB(i, indy + j + 1, indz + k, em::ex1); + const auto ex1_221 = EB(i + 1, indy + j + 1, indz + k, em::ex1); + const auto ex1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_102 = EB(i, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::ex1); + const auto ex1_012 = EB(i - 1, indy + j, indz + k + 1, em::ex1); + const auto ex1_112 = EB(i, indy + j, indz + k + 1, em::ex1); + const auto ex1_212 = EB(i + 1, indy + j, indz + k + 1, em::ex1); + const auto ex1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::ex1); + const auto ex1_122 = EB(i, indy + j + 1, indz + k + 1, em::ex1); + const auto ex1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::ex1); // clang-format on const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; @@ -1393,35 +1405,33 @@ namespace kernel::sr { // Ex2 // Interpolate -- (primal, dual, primal) // clang-format off - const auto ex2_000 = EB(i - 1, indy + j - 1, k - 1, em::ex2); - const auto ex2_100 = EB(i, indy + j - 1, k - 1, em::ex2); - const auto ex2_200 = EB(i + 1, indy + j - 1, k - 1, em::ex2); - const auto ex2_010 = EB(i - 1, indy + j, k - 1, em::ex2); - const auto ex2_110 = EB(i, indy + j, k - 1, em::ex2); - const auto ex2_210 = EB(i + 1, indy + j, k - 1, em::ex2); - const auto ex2_020 = EB(i - 1, indy + j + 1, k - 1, em::ex2); - const auto ex2_120 = EB(i, indy + j + 1, k - 1, em::ex2); - const auto ex2_220 = EB(i + 1, indy + j + 1, k - 1, em::ex2); - - const auto ex2_001 = EB(i - 1, indy + j - 1, k, em::ex2); - const auto ex2_101 = EB(i, indy + j - 1, k, em::ex2); - const auto ex2_201 = EB(i + 1, indy + j - 1, k, em::ex2); - const auto ex2_011 = EB(i - 1, indy + j, k, em::ex2); - const auto ex2_111 = EB(i, indy + j, k, em::ex2); - const auto ex2_211 = EB(i + 1, indy + j, k, em::ex2); - const auto ex2_021 = EB(i - 1, indy + j + 1, k, em::ex2); - const auto ex2_121 = EB(i, indy + j + 1, k, em::ex2); - const auto ex2_221 = EB(i + 1, indy + j + 1, k, em::ex2); - - const auto ex2_002 = EB(i - 1, indy + j - 1, k + 1, em::ex2); - const auto ex2_102 = EB(i, indy + j - 1, k + 1, em::ex2); - const auto ex2_202 = EB(i + 1, indy + j - 1, k + 1, em::ex2); - const auto ex2_012 = EB(i - 1, indy + j, k + 1, em::ex2); - const auto ex2_112 = EB(i, indy + j, k + 1, em::ex2); - const auto ex2_212 = EB(i + 1, indy + j, k + 1, em::ex2); - const auto ex2_022 = EB(i - 1, indy + j + 1, k + 1, em::ex2); - const auto ex2_122 = EB(i, indy + j + 1, k + 1, em::ex2); - const auto ex2_222 = EB(i + 1, indy + j + 1, k + 1, em::ex2); + const auto ex2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::ex2); + const auto ex2_100 = EB(indx + i, j - 1, indz + k - 1, em::ex2); + const auto ex2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::ex2); + const auto ex2_010 = EB(indx + i - 1, j, indz + k - 1, em::ex2); + const auto ex2_110 = EB(indx + i, j, indz + k - 1, em::ex2); + const auto ex2_210 = EB(indx + i + 1, j, indz + k - 1, em::ex2); + const auto ex2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::ex2); + const auto ex2_120 = EB(indx + i, j + 1, indz + k - 1, em::ex2); + const auto ex2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::ex2); + const auto ex2_001 = EB(indx + i - 1, j - 1, indz + k, em::ex2); + const auto ex2_101 = EB(indx + i, j - 1, indz + k, em::ex2); + const auto ex2_201 = EB(indx + i + 1, j - 1, indz + k, em::ex2); + const auto ex2_011 = EB(indx + i - 1, j, indz + k, em::ex2); + const auto ex2_111 = EB(indx + i, j, indz + k, em::ex2); + const auto ex2_211 = EB(indx + i + 1, j, indz + k, em::ex2); + const auto ex2_021 = EB(indx + i - 1, j + 1, indz + k, em::ex2); + const auto ex2_121 = EB(indx + i, j + 1, indz + k, em::ex2); + const auto ex2_221 = EB(indx + i + 1, j + 1, indz + k, em::ex2); + const auto ex2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::ex2); + const auto ex2_102 = EB(indx + i, j - 1, indz + k + 1, em::ex2); + const auto ex2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::ex2); + const auto ex2_012 = EB(indx + i - 1, j, indz + k + 1, em::ex2); + const auto ex2_112 = EB(indx + i, j, indz + k + 1, em::ex2); + const auto ex2_212 = EB(indx + i + 1, j, indz + k + 1, em::ex2); + const auto ex2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::ex2); + const auto ex2_122 = EB(indx + i, j + 1, indz + k + 1, em::ex2); + const auto ex2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::ex2); // clang-format on const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; @@ -1443,35 +1453,33 @@ namespace kernel::sr { // Ex3 // Interpolate -- (primal, primal, dual) // clang-format off - const auto ex3_000 = EB(i - 1, j - 1, indz + k - 1, em::ex3); - const auto ex3_100 = EB(i, j - 1, indz + k - 1, em::ex3); - const auto ex3_200 = EB(i + 1, j - 1, indz + k - 1, em::ex3); - const auto ex3_010 = EB(i - 1, j, indz + k - 1, em::ex3); - const auto ex3_110 = EB(i, j, indz + k - 1, em::ex3); - const auto ex3_210 = EB(i + 1, j, indz + k - 1, em::ex3); - const auto ex3_020 = EB(i - 1, j + 1, indz + k - 1, em::ex3); - const auto ex3_120 = EB(i, j + 1, indz + k - 1, em::ex3); - const auto ex3_220 = EB(i + 1, j + 1, indz + k - 1, em::ex3); - - const auto ex3_001 = EB(i - 1, j - 1, indz + k, em::ex3); - const auto ex3_101 = EB(i, j - 1, indz + k, em::ex3); - const auto ex3_201 = EB(i + 1, j - 1, indz + k, em::ex3); - const auto ex3_011 = EB(i - 1, j, indz + k, em::ex3); - const auto ex3_111 = EB(i, j, indz + k, em::ex3); - const auto ex3_211 = EB(i + 1, j, indz + k, em::ex3); - const auto ex3_021 = EB(i - 1, j + 1, indz + k, em::ex3); - const auto ex3_121 = EB(i, j + 1, indz + k, em::ex3); - const auto ex3_221 = EB(i + 1, j + 1, indz + k, em::ex3); - - const auto ex3_002 = EB(i - 1, j - 1, indz + k + 1, em::ex3); - const auto ex3_102 = EB(i, j - 1, indz + k + 1, em::ex3); - const auto ex3_202 = EB(i + 1, j - 1, indz + k + 1, em::ex3); - const auto ex3_012 = EB(i - 1, j, indz + k + 1, em::ex3); - const auto ex3_112 = EB(i, j, indz + k + 1, em::ex3); - const auto ex3_212 = EB(i + 1, j, indz + k + 1, em::ex3); - const auto ex3_022 = EB(i - 1, j + 1, indz + k + 1, em::ex3); - const auto ex3_122 = EB(i, j + 1, indz + k + 1, em::ex3); - const auto ex3_222 = EB(i + 1, j + 1, indz + k + 1, em::ex3); + const auto ex3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::ex3); + const auto ex3_100 = EB(indx + i, indy + j - 1, k - 1, em::ex3); + const auto ex3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::ex3); + const auto ex3_010 = EB(indx + i - 1, indy + j, k - 1, em::ex3); + const auto ex3_110 = EB(indx + i, indy + j, k - 1, em::ex3); + const auto ex3_210 = EB(indx + i + 1, indy + j, k - 1, em::ex3); + const auto ex3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::ex3); + const auto ex3_120 = EB(indx + i, indy + j + 1, k - 1, em::ex3); + const auto ex3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::ex3); + const auto ex3_001 = EB(indx + i - 1, indy + j - 1, k, em::ex3); + const auto ex3_101 = EB(indx + i, indy + j - 1, k, em::ex3); + const auto ex3_201 = EB(indx + i + 1, indy + j - 1, k, em::ex3); + const auto ex3_011 = EB(indx + i - 1, indy + j, k, em::ex3); + const auto ex3_111 = EB(indx + i, indy + j, k, em::ex3); + const auto ex3_211 = EB(indx + i + 1, indy + j, k, em::ex3); + const auto ex3_021 = EB(indx + i - 1, indy + j + 1, k, em::ex3); + const auto ex3_121 = EB(indx + i, indy + j + 1, k, em::ex3); + const auto ex3_221 = EB(indx + i + 1, indy + j + 1, k, em::ex3); + const auto ex3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::ex3); + const auto ex3_102 = EB(indx + i, indy + j - 1, k + 1, em::ex3); + const auto ex3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::ex3); + const auto ex3_012 = EB(indx + i - 1, indy + j, k + 1, em::ex3); + const auto ex3_112 = EB(indx + i, indy + j, k + 1, em::ex3); + const auto ex3_212 = EB(indx + i + 1, indy + j, k + 1, em::ex3); + const auto ex3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::ex3); + const auto ex3_122 = EB(indx + i, indy + j + 1, k + 1, em::ex3); + const auto ex3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::ex3); // clang-format on const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; @@ -1493,35 +1501,33 @@ namespace kernel::sr { // Bx1 // Interpolate -- (primal, dual, dual) // clang-format off - const auto bx1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_100 = EB(i, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::bx1); - const auto bx1_010 = EB(i - 1, indy + j, indz + k - 1, em::bx1); - const auto bx1_110 = EB(i, indy + j, indz + k - 1, em::bx1); - const auto bx1_210 = EB(i + 1, indy + j, indz + k - 1, em::bx1); - const auto bx1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::bx1); - const auto bx1_120 = EB(i, indy + j + 1, indz + k - 1, em::bx1); - const auto bx1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::bx1); - - const auto bx1_001 = EB(i - 1, indy + j - 1, indz + k, em::bx1); - const auto bx1_101 = EB(i, indy + j - 1, indz + k, em::bx1); - const auto bx1_201 = EB(i + 1, indy + j - 1, indz + k, em::bx1); - const auto bx1_011 = EB(i - 1, indy + j, indz + k, em::bx1); - const auto bx1_111 = EB(i, indy + j, indz + k, em::bx1); - const auto bx1_211 = EB(i + 1, indy + j, indz + k, em::bx1); - const auto bx1_021 = EB(i - 1, indy + j + 1, indz + k, em::bx1); - const auto bx1_121 = EB(i, indy + j + 1, indz + k, em::bx1); - const auto bx1_221 = EB(i + 1, indy + j + 1, indz + k, em::bx1); - - const auto bx1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_102 = EB(i, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::bx1); - const auto bx1_012 = EB(i - 1, indy + j, indz + k + 1, em::bx1); - const auto bx1_112 = EB(i, indy + j, indz + k + 1, em::bx1); - const auto bx1_212 = EB(i + 1, indy + j, indz + k + 1, em::bx1); - const auto bx1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::bx1); - const auto bx1_122 = EB(i, indy + j + 1, indz + k + 1, em::bx1); - const auto bx1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, j - 1, k - 1, em::bx1); + const auto bx1_100 = EB(indx + i, j - 1, k - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, j - 1, k - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, j, k - 1, em::bx1); + const auto bx1_110 = EB(indx + i, j, k - 1, em::bx1); + const auto bx1_210 = EB(indx + i + 1, j, k - 1, em::bx1); + const auto bx1_020 = EB(indx + i - 1, j + 1, k - 1, em::bx1); + const auto bx1_120 = EB(indx + i, j + 1, k - 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, j + 1, k - 1, em::bx1); + const auto bx1_001 = EB(indx + i - 1, j - 1, k, em::bx1); + const auto bx1_101 = EB(indx + i, j - 1, k, em::bx1); + const auto bx1_201 = EB(indx + i + 1, j - 1, k, em::bx1); + const auto bx1_011 = EB(indx + i - 1, j, k, em::bx1); + const auto bx1_111 = EB(indx + i, j, k, em::bx1); + const auto bx1_211 = EB(indx + i + 1, j, k, em::bx1); + const auto bx1_021 = EB(indx + i - 1, j + 1, k, em::bx1); + const auto bx1_121 = EB(indx + i, j + 1, k, em::bx1); + const auto bx1_221 = EB(indx + i + 1, j + 1, k, em::bx1); + const auto bx1_002 = EB(indx + i - 1, j - 1, k + 1, em::bx1); + const auto bx1_102 = EB(indx + i, j - 1, k + 1, em::bx1); + const auto bx1_202 = EB(indx + i + 1, j - 1, k + 1, em::bx1); + const auto bx1_012 = EB(indx + i - 1, j, k + 1, em::bx1); + const auto bx1_112 = EB(indx + i, j, k + 1, em::bx1); + const auto bx1_212 = EB(indx + i + 1, j, k + 1, em::bx1); + const auto bx1_022 = EB(indx + i - 1, j + 1, k + 1, em::bx1); + const auto bx1_122 = EB(indx + i, j + 1, k + 1, em::bx1); + const auto bx1_222 = EB(indx + i + 1, j + 1, k + 1, em::bx1); // clang-format on const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; @@ -1543,35 +1549,33 @@ namespace kernel::sr { // Bx2 // Interpolate -- (dual, primal, dual) // clang-format off - const auto bx2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::bx2); - const auto bx2_100 = EB(indx + i, j - 1, indz + k - 1, em::bx2); - const auto bx2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::bx2); - const auto bx2_010 = EB(indx + i - 1, j, indz + k - 1, em::bx2); - const auto bx2_110 = EB(indx + i, j, indz + k - 1, em::bx2); - const auto bx2_210 = EB(indx + i + 1, j, indz + k - 1, em::bx2); - const auto bx2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::bx2); - const auto bx2_120 = EB(indx + i, j + 1, indz + k - 1, em::bx2); - const auto bx2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::bx2); - - const auto bx2_001 = EB(indx + i - 1, j - 1, indz + k, em::bx2); - const auto bx2_101 = EB(indx + i, j - 1, indz + k, em::bx2); - const auto bx2_201 = EB(indx + i + 1, j - 1, indz + k, em::bx2); - const auto bx2_011 = EB(indx + i - 1, j, indz + k, em::bx2); - const auto bx2_111 = EB(indx + i, j, indz + k, em::bx2); - const auto bx2_211 = EB(indx + i + 1, j, indz + k, em::bx2); - const auto bx2_021 = EB(indx + i - 1, j + 1, indz + k, em::bx2); - const auto bx2_121 = EB(indx + i, j + 1, indz + k, em::bx2); - const auto bx2_221 = EB(indx + i + 1, j + 1, indz + k, em::bx2); - - const auto bx2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::bx2); - const auto bx2_102 = EB(indx + i, j - 1, indz + k + 1, em::bx2); - const auto bx2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::bx2); - const auto bx2_012 = EB(indx + i - 1, j, indz + k + 1, em::bx2); - const auto bx2_112 = EB(indx + i, j, indz + k + 1, em::bx2); - const auto bx2_212 = EB(indx + i + 1, j, indz + k + 1, em::bx2); - const auto bx2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::bx2); - const auto bx2_122 = EB(indx + i, j + 1, indz + k + 1, em::bx2); - const auto bx2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::bx2); + const auto bx2_000 = EB(i - 1, indy + j - 1, k - 1, em::bx2); + const auto bx2_100 = EB(i, indy + j - 1, k - 1, em::bx2); + const auto bx2_200 = EB(i + 1, indy + j - 1, k - 1, em::bx2); + const auto bx2_010 = EB(i - 1, indy + j, k - 1, em::bx2); + const auto bx2_110 = EB(i, indy + j, k - 1, em::bx2); + const auto bx2_210 = EB(i + 1, indy + j, k - 1, em::bx2); + const auto bx2_020 = EB(i - 1, indy + j + 1, k - 1, em::bx2); + const auto bx2_120 = EB(i, indy + j + 1, k - 1, em::bx2); + const auto bx2_220 = EB(i + 1, indy + j + 1, k - 1, em::bx2); + const auto bx2_001 = EB(i - 1, indy + j - 1, k, em::bx2); + const auto bx2_101 = EB(i, indy + j - 1, k, em::bx2); + const auto bx2_201 = EB(i + 1, indy + j - 1, k, em::bx2); + const auto bx2_011 = EB(i - 1, indy + j, k, em::bx2); + const auto bx2_111 = EB(i, indy + j, k, em::bx2); + const auto bx2_211 = EB(i + 1, indy + j, k, em::bx2); + const auto bx2_021 = EB(i - 1, indy + j + 1, k, em::bx2); + const auto bx2_121 = EB(i, indy + j + 1, k, em::bx2); + const auto bx2_221 = EB(i + 1, indy + j + 1, k, em::bx2); + const auto bx2_002 = EB(i - 1, indy + j - 1, k + 1, em::bx2); + const auto bx2_102 = EB(i, indy + j - 1, k + 1, em::bx2); + const auto bx2_202 = EB(i + 1, indy + j - 1, k + 1, em::bx2); + const auto bx2_012 = EB(i - 1, indy + j, k + 1, em::bx2); + const auto bx2_112 = EB(i, indy + j, k + 1, em::bx2); + const auto bx2_212 = EB(i + 1, indy + j, k + 1, em::bx2); + const auto bx2_022 = EB(i - 1, indy + j + 1, k + 1, em::bx2); + const auto bx2_122 = EB(i, indy + j + 1, k + 1, em::bx2); + const auto bx2_222 = EB(i + 1, indy + j + 1, k + 1, em::bx2); // clang-format on const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; @@ -1593,35 +1597,33 @@ namespace kernel::sr { // Bx3 // Interpolate -- (dual, dual, primal) // clang-format off - const auto bx3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::bx3); - const auto bx3_100 = EB(indx + i, indy + j - 1, k - 1, em::bx3); - const auto bx3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::bx3); - const auto bx3_010 = EB(indx + i - 1, indy + j, k - 1, em::bx3); - const auto bx3_110 = EB(indx + i, indy + j, k - 1, em::bx3); - const auto bx3_210 = EB(indx + i + 1, indy + j, k - 1, em::bx3); - const auto bx3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::bx3); - const auto bx3_120 = EB(indx + i, indy + j + 1, k - 1, em::bx3); - const auto bx3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::bx3); - - const auto bx3_001 = EB(indx + i - 1, indy + j - 1, k, em::bx3); - const auto bx3_101 = EB(indx + i, indy + j - 1, k, em::bx3); - const auto bx3_201 = EB(indx + i + 1, indy + j - 1, k, em::bx3); - const auto bx3_011 = EB(indx + i - 1, indy + j, k, em::bx3); - const auto bx3_111 = EB(indx + i, indy + j, k, em::bx3); - const auto bx3_211 = EB(indx + i + 1, indy + j, k, em::bx3); - const auto bx3_021 = EB(indx + i - 1, indy + j + 1, k, em::bx3); - const auto bx3_121 = EB(indx + i, indy + j + 1, k, em::bx3); - const auto bx3_221 = EB(indx + i + 1, indy + j + 1, k, em::bx3); - - const auto bx3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::bx3); - const auto bx3_102 = EB(indx + i, indy + j - 1, k + 1, em::bx3); - const auto bx3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::bx3); - const auto bx3_012 = EB(indx + i - 1, indy + j, k + 1, em::bx3); - const auto bx3_112 = EB(indx + i, indy + j, k + 1, em::bx3); - const auto bx3_212 = EB(indx + i + 1, indy + j, k + 1, em::bx3); - const auto bx3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::bx3); - const auto bx3_122 = EB(indx + i, indy + j + 1, k + 1, em::bx3); - const auto bx3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::bx3); + const auto bx3_000 = EB(i - 1, j - 1, indz + k - 1, em::bx3); + const auto bx3_100 = EB(i, j - 1, indz + k - 1, em::bx3); + const auto bx3_200 = EB(i + 1, j - 1, indz + k - 1, em::bx3); + const auto bx3_010 = EB(i - 1, j, indz + k - 1, em::bx3); + const auto bx3_110 = EB(i, j, indz + k - 1, em::bx3); + const auto bx3_210 = EB(i + 1, j, indz + k - 1, em::bx3); + const auto bx3_020 = EB(i - 1, j + 1, indz + k - 1, em::bx3); + const auto bx3_120 = EB(i, j + 1, indz + k - 1, em::bx3); + const auto bx3_220 = EB(i + 1, j + 1, indz + k - 1, em::bx3); + const auto bx3_001 = EB(i - 1, j - 1, indz + k, em::bx3); + const auto bx3_101 = EB(i, j - 1, indz + k, em::bx3); + const auto bx3_201 = EB(i + 1, j - 1, indz + k, em::bx3); + const auto bx3_011 = EB(i - 1, j, indz + k, em::bx3); + const auto bx3_111 = EB(i, j, indz + k, em::bx3); + const auto bx3_211 = EB(i + 1, j, indz + k, em::bx3); + const auto bx3_021 = EB(i - 1, j + 1, indz + k, em::bx3); + const auto bx3_121 = EB(i, j + 1, indz + k, em::bx3); + const auto bx3_221 = EB(i + 1, j + 1, indz + k, em::bx3); + const auto bx3_002 = EB(i - 1, j - 1, indz + k + 1, em::bx3); + const auto bx3_102 = EB(i, j - 1, indz + k + 1, em::bx3); + const auto bx3_202 = EB(i + 1, j - 1, indz + k + 1, em::bx3); + const auto bx3_012 = EB(i - 1, j, indz + k + 1, em::bx3); + const auto bx3_112 = EB(i, j, indz + k + 1, em::bx3); + const auto bx3_212 = EB(i + 1, j, indz + k + 1, em::bx3); + const auto bx3_022 = EB(i - 1, j + 1, indz + k + 1, em::bx3); + const auto bx3_122 = EB(i, j + 1, indz + k + 1, em::bx3); + const auto bx3_222 = EB(i + 1, j + 1, indz + k + 1, em::bx3); // clang-format on const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; From 1d4b074f836870c1b56e3e3704030fc1471bde51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 27 May 2025 16:36:54 -0500 Subject: [PATCH 039/154] bugfixes for variable names --- src/kernels/particle_pusher_sr.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index c9dd83777..e6e7224d4 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1110,50 +1110,50 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); + const auto w0p = HALF * SQR(HALF + dx1_center); + const auto w1p = static_cast(0.75) - SQR(dx1_center); + const auto w2p = HALF * SQR(HALF - dx1_center); // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; + const auto w0d = HALF * SQR(ONE - dx1_); + const auto w2d = HALF * SQR(dx1_); + const auto w1d = ONE - w0d - w2d; // Ex1 (dual grid) const auto ex1_0 = EB(i - 1, em::ex1); const auto ex1_1 = EB(i, em::ex1); const auto ex1_2 = EB(i + 1, em::ex1); - e0[0] = ex1_0 * wd0 + ex1_1 * wd0 + ex1_2 * wd0; + e0[0] = ex1_0 * w0d + ex1_1 * w1d + ex1_2 * w2d; // Ex2 (primal grid) const auto ex2_0 = EB(indx + i - 1, em::ex2); const auto ex2_1 = EB(indx + i, em::ex2); const auto ex2_2 = EB(indx + i + 1, em::ex2); - e0[1] = ex2_0 * wp0 + ex2_1 * wp1 + ex2_2 * wp2; + e0[1] = ex2_0 * w0p + ex2_1 * w1p + ex2_2 * w2p; // Ex3 (primal grid) const auto ex3_0 = EB(indx + i - 1, em::ex3); const auto ex3_1 = EB(indx + i, em::ex3); const auto ex3_2 = EB(indx + i + 1, em::ex3); - e0[2] = ex3_0 * wp0 + ex3_1 * wp1 + ex3_2 * wp2; + e0[2] = ex3_0 * w0p + ex3_1 * w1p + ex3_2 * w2p; // Bx1 (primal grid) const auto bx1_0 = EB(indx + i - 1, em::bx1); const auto bx1_1 = EB(indx + i, em::bx1); const auto bx1_2 = EB(indx + i + 1, em::bx1); - b0[0] = bx1_0 * wp0 + bx1_1 * wp1 + bx1_2 * wp2; + b0[0] = bx1_0 * w0p + bx1_1 * w1p + bx1_2 * w2p; // Bx2 (dual grid) const auto bx2_0 = EB(i - 1, em::bx2); const auto bx2_1 = EB(i, em::bx2); const auto bx2_2 = EB(i + 1, em::bx2); - b0[1] = bx2_0 * wd0 + bx2_1 * wd1 + bx2_2 * wd2; + b0[1] = bx2_0 * w0d + bx2_1 * w1d + bx2_2 * w2d; // Bx3 (dual grid) const auto bx3_0 = EB(i - 1, em::bx3); const auto bx3_1 = EB(i, em::bx3); const auto bx3_2 = EB(i + 1, em::bx3); - b0[2] = bx3_0 * wd0 + bx3_1 * wd1 + bx3_2 * wd2; + b0[2] = bx3_0 * w0d + bx3_1 * w1d + bx3_2 * w2d; } else if constexpr (D == Dim::_2D) { const int i { i1(p) + static_cast(N_GHOSTS) }; From b551c10abdc19138aa9e76312b9f47680245810b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sat, 7 Jun 2025 15:27:38 -0500 Subject: [PATCH 040/154] bugfix --- src/kernels/particle_pusher_sr.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index e6e7224d4..a8984de20 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1189,7 +1189,7 @@ namespace kernel::sr { const auto w1dx = ONE - w0dx - w2dx; const auto w0dy = HALF * SQR(ONE - dx2_); const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dx - w2dy; + const auto w1dy = ONE - w0dy - w2dy; // Ex1 // Interpolate --- (dual, primal) @@ -1349,10 +1349,10 @@ namespace kernel::sr { const auto w1dx = ONE - w0dx - w2dx; const auto w0dy = HALF * SQR(ONE - dx2_); const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dx - w2dy; + const auto w1dy = ONE - w0dy - w2dy; const auto w0dz = HALF * SQR(ONE - dx3_); const auto w2dz = HALF * SQR(dx3_); - const auto w1dz = ONE - w0dx - w2dy; + const auto w1dz = ONE - w0dz - w2dz; // Ex1 // Interpolate --- (dual, primal, primal) From a8aa8bbae81789fb94669c8611cb12483b418a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Sun, 8 Jun 2025 22:42:55 -0500 Subject: [PATCH 041/154] bugfix --- src/kernels/particle_pusher_sr.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index a8984de20..2d7569319 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1251,15 +1251,15 @@ namespace kernel::sr { // Bx1 // Interpolate --- (primal, dual) // clang-format off - const auto bx1_000 = EB(indx + i - 1, indy + j - 1, em::bx1); - const auto bx1_100 = EB(indx + i, indy + j - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, indy + j - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, indy + j, em::bx1); - const auto bx1_110 = EB(indx + i, indy + j, em::bx1); - const auto bx1_210 = EB(indx + i + 1, indy + j, em::bx1); - const auto bx1_020 = EB(indx + i - 1, indy + j + 1, em::bx1); - const auto bx1_120 = EB(indx + i, indy + j + 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, indy + j + 1, em::bx1); + const auto bx1_000 = EB(indx + i - 1, j - 1, em::bx1); + const auto bx1_100 = EB(indx + i, j - 1, em::bx1); + const auto bx1_200 = EB(indx + i + 1, j - 1, em::bx1); + const auto bx1_010 = EB(indx + i - 1, j, em::bx1); + const auto bx1_110 = EB(indx + i, j, em::bx1); + const auto bx1_210 = EB(indx + i + 1, j, em::bx1); + const auto bx1_020 = EB(indx + i - 1, j + 1, em::bx1); + const auto bx1_120 = EB(indx + i, j + 1, em::bx1); + const auto bx1_220 = EB(indx + i + 1, j + 1, em::bx1); // clang-format on const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; From db8ad4ed7ea63bd0f1ebd11fb814cbaaa4eec95c Mon Sep 17 00:00:00 2001 From: gorbunove Date: Wed, 2 Jul 2025 18:19:28 -0500 Subject: [PATCH 042/154] IC RR force added --- src/engines/srpic.hpp | 22 ++++++++++++---- src/framework/parameters.cpp | 15 +++++++++++ src/global/defaults.h | 5 +++- src/global/enums.h | 9 ++++--- src/kernels/particle_pusher_sr.hpp | 41 +++++++++++++++++++++++++----- 5 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 6b6a52039..b066f54f7 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -318,6 +318,7 @@ namespace ntt { : ZERO; // cooling const auto has_synchrotron = (cooling == Cooling::SYNCHROTRON); + const auto has_compton = (cooling == Cooling::COMPTON); const auto sync_grad = has_synchrotron ? m_params.template get( "algorithms.synchrotron.gamma_rad") @@ -328,7 +329,15 @@ namespace ntt { "scales.omegaB0") / (SQR(sync_grad) * species.mass()) : ZERO; - + const auto comp_grad = has_compton + ? m_params.template get( + "algorithms.compton.gamma_rad") + : ZERO; + const auto comp_coeff = has_compton + ? (real_t)(0.1) * dt * + m_params.template get( + "scales.omegaB0") / (SQR(comp_grad) * species.mass()) + : ZERO; // toggle to indicate whether pgen defines the external force bool has_extforce = false; if constexpr (traits::has_member::value) { @@ -346,6 +355,9 @@ namespace ntt { if (cooling == Cooling::SYNCHROTRON) { cooling_tags = kernel::sr::Cooling::Synchrotron; } + if (cooling == Cooling::COMPTON) { + cooling_tags = kernel::sr::Cooling::Compton; + } // clang-format off if (not has_atmosphere and not has_extforce) { Kokkos::parallel_for( @@ -368,7 +380,7 @@ namespace ntt { domain.mesh.n_active(in::x2), domain.mesh.n_active(in::x3), domain.mesh.prtl_bc(), - gca_larmor_max, gca_eovrb_max, sync_coeff + gca_larmor_max, gca_eovrb_max, sync_coeff, comp_coeff )); } else if (has_atmosphere and not has_extforce) { const auto force = @@ -398,7 +410,7 @@ namespace ntt { domain.mesh.n_active(in::x2), domain.mesh.n_active(in::x3), domain.mesh.prtl_bc(), - gca_larmor_max, gca_eovrb_max, sync_coeff + gca_larmor_max, gca_eovrb_max, sync_coeff, comp_coeff )); } else if (not has_atmosphere and has_extforce) { if constexpr (traits::has_member::value) { @@ -427,7 +439,7 @@ namespace ntt { domain.mesh.n_active(in::x2), domain.mesh.n_active(in::x3), domain.mesh.prtl_bc(), - gca_larmor_max, gca_eovrb_max, sync_coeff + gca_larmor_max, gca_eovrb_max, sync_coeff, comp_coeff )); } else { raise::Error("External force not implemented", HERE); @@ -459,7 +471,7 @@ namespace ntt { domain.mesh.n_active(in::x2), domain.mesh.n_active(in::x3), domain.mesh.prtl_bc(), - gca_larmor_max, gca_eovrb_max, sync_coeff + gca_larmor_max, gca_eovrb_max, sync_coeff, comp_coeff )); } else { raise::Error("External force not implemented", HERE); diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 079ad615e..75244ed5b 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -235,6 +235,13 @@ namespace ntt { promiseToDefine("algorithms.synchrotron.gamma_rad"); } + if (cooling_enum == Cooling::COMPTON) { + raise::ErrorIf(engine_enum != SimEngine::SRPIC, + "Inverse Compton cooling is only supported for SRPIC", + HERE); + promiseToDefine("algorithms.compton.gamma_rad"); + } + species.emplace_back(ParticleSpecies(idx, label, mass, @@ -916,6 +923,14 @@ namespace ntt { "gamma_rad", defaults::synchrotron::gamma_rad)); } + if (isPromised("algorithms.compton.gamma_rad")) { + set("algorithms.compton.gamma_rad", + toml::find_or(toml_data, + "algorithms", + "compton", + "gamma_rad", + defaults::compton::gamma_rad)); + } // @TODO: disabling stats for non-Cartesian if (coord_enum != Coord::Cart) { diff --git a/src/global/defaults.h b/src/global/defaults.h index 9513493b1..7c16d80f3 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -85,7 +85,10 @@ namespace ntt::defaults { namespace synchrotron { const real_t gamma_rad = 1.0; } // namespace synchrotron - + + namespace compton{ + const real_t gamma_rad = 1.0; + } } // namespace ntt::defaults #endif // GLOBAL_DEFAULTS_H diff --git a/src/global/enums.h b/src/global/enums.h index 08130a2c8..80804b55a 100644 --- a/src/global/enums.h +++ b/src/global/enums.h @@ -11,7 +11,7 @@ * - enum ntt::FldsBC // periodic, match, fixed, atmosphere, * custom, horizon, axis, conductor, sync * - enum ntt::PrtlPusher // boris, vay, photon, none - * - enum ntt::Cooling // synchrotron, none + * - enum ntt::Cooling // compton, synchrotron, none * - enum ntt::FldsID // e, dive, d, divd, b, h, j, * a, t, rho, charge, n, nppc, v, custom * - enum ntt::StatsID // b^2, e^2, exb, j.e, t, rho, @@ -265,13 +265,14 @@ namespace ntt { enum type : uint8_t { INVALID = 0, SYNCHROTRON = 1, - NONE = 2, + COMPTON = 2, + NONE = 3, }; constexpr Cooling(uint8_t c) : enums_hidden::BaseEnum { c } {} - static constexpr type variants[] = { SYNCHROTRON, NONE }; - static constexpr const char* lookup[] = { "synchrotron", "none" }; + static constexpr type variants[] = { SYNCHROTRON, COMPTON, NONE }; + static constexpr const char* lookup[] = { "synchrotron", "compton", "none" }; static constexpr std::size_t total = sizeof(variants) / sizeof(variants[0]); }; diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 6bd4e1714..6f437ff0f 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -51,6 +51,7 @@ namespace kernel::sr { enum CoolingTags_ { None = 0, Synchrotron = 1 << 0, + Compton = 1 << 1, }; } // namespace Cooling @@ -225,8 +226,8 @@ namespace kernel::sr { bool is_axis_i2min { false }, is_axis_i2max { false }; // gca parameters const real_t gca_larmor, gca_EovrB_sqr; - // synchrotron cooling parameters - const real_t coeff_sync; + // radiative cooling parameters + const real_t coeff_sync, coeff_comp; public: Pusher_kernel(const PrtlPusher::type& pusher, @@ -263,7 +264,8 @@ namespace kernel::sr { const boundaries_t& boundaries, real_t gca_larmor_max, real_t gca_eovrb_max, - real_t coeff_sync) + real_t coeff_sync, + real_t coeff_comp) : pusher { pusher } , GCA { GCA } , ext_force { ext_force } @@ -297,7 +299,8 @@ namespace kernel::sr { , ni3 { ni3 } , gca_larmor { gca_larmor_max } , gca_EovrB_sqr { SQR(gca_eovrb_max) } - , coeff_sync { coeff_sync } { + , coeff_sync { coeff_sync } + , coeff_comp { coeff_comp } { raise::ErrorIf(boundaries.size() < 1, "boundaries defined incorrectly", HERE); is_absorb_i1min = (boundaries[0].first == PrtlBC::ATMOSPHERE) || (boundaries[0].first == PrtlBC::ABSORB); @@ -366,7 +369,8 @@ namespace kernel::sr { const boundaries_t& boundaries, real_t gca_larmor_max, real_t gca_eovrb_max, - real_t coeff_sync) + real_t coeff_sync, + real_t coeff_comp) : Pusher_kernel(pusher, GCA, ext_force, @@ -401,7 +405,8 @@ namespace kernel::sr { boundaries, gca_larmor_max, gca_eovrb_max, - coeff_sync) {} + coeff_sync, + coeff_comp) {} Inline void synchrotronDrag(index_t& p, vec_t& u_prime, @@ -454,6 +459,22 @@ namespace kernel::sr { ux3(p) += coeff_sync * (kappaR[2] - gamma_prime_sqr * u_prime[2] * chiR_sqr); } + Inline void inverseComptonDrag(index_t& p, + vec_t& u_prime + ) const { + real_t gamma_prime_sqr = ONE / math::sqrt(ONE + NORM_SQR(u_prime[0], + u_prime[1], + u_prime[2])); + u_prime[0] *= gamma_prime_sqr; + u_prime[1] *= gamma_prime_sqr; + u_prime[2] *= gamma_prime_sqr; + gamma_prime_sqr = SQR(ONE / gamma_prime_sqr); + + ux1(p) -= coeff_comp * gamma_prime_sqr * u_prime[0]; + ux2(p) -= coeff_comp * gamma_prime_sqr * u_prime[1]; + ux3(p) -= coeff_comp * gamma_prime_sqr * u_prime[2]; + } + Inline void operator()(index_t p) const { if (tag(p) != ParticleTag::alive) { if (tag(p) != ParticleTag::dead) { @@ -558,6 +579,14 @@ namespace kernel::sr { synchrotronDrag(p, u_prime, ei_Cart_rad, bi_Cart_rad); } } + if (cooling & Cooling::Compton) { + if (!is_gca) { + u_prime[0] = HALF * (u_prime[0] + ux1(p)); + u_prime[1] = HALF * (u_prime[1] + ux2(p)); + u_prime[2] = HALF * (u_prime[2] + ux3(p)); + inverseComptonDrag(p, u_prime); + } + } // update position posUpd(true, p, xp_Cd); } From 77cdd68b6fa4c5c49f38777362d749cf7fa77ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Mon, 14 Jul 2025 14:42:03 -0500 Subject: [PATCH 043/154] fix 2nd order shape function in pusher --- src/kernels/particle_pusher_sr.hpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 2d7569319..e379e96f2 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,7 +475,7 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // getInterpFlds(p, ei, bi); + //getInterpFlds(p, ei, bi); // ToDo: Better way to call this getInterpFlds2nd(p, ei, bi); @@ -1161,14 +1161,6 @@ namespace kernel::sr { const auto dx1_ { static_cast(dx1(p)) }; const auto dx2_ { static_cast(dx2(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - - const int dx2_less_half = static_cast(dx2_ < - static_cast(0.5)); - const auto dx2_center = static_cast(dx2_less_half) - dx2_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); @@ -1176,12 +1168,14 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); - const auto w0py = HALF * SQR(HALF + dx2_center); - const auto w1py = static_cast(0.75) - SQR(dx2_center); - const auto w2py = HALF * SQR(HALF - dx2_center); + const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1px = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2px = ONE - w0px - w1px; + const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); + const auto w1py = static_cast(0.75) - + SQR(dx2_ - static_cast(indy)); + const auto w2py = ONE - w0py - w1py; // dual const auto w0dx = HALF * SQR(ONE - dx1_); From 8c60b0179d07a092fb0eb4be9365d9b2146e4dc9 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 17 Jul 2025 09:40:33 -0500 Subject: [PATCH 044/154] fix second order weight functions in 1D and 3D --- src/kernels/particle_pusher_sr.hpp | 44 +++++++++++------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index e379e96f2..826da86e5 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1100,19 +1100,16 @@ namespace kernel::sr { const int i { i1(p) + static_cast(N_GHOSTS) }; const auto dx1_ { static_cast(dx1(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); // Compute weights for second-order interpolation // primal - const auto w0p = HALF * SQR(HALF + dx1_center); - const auto w1p = static_cast(0.75) - SQR(dx1_center); - const auto w2p = HALF * SQR(HALF - dx1_center); + const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1p = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2p = ONE - w0p - w1p; // dual const auto w0d = HALF * SQR(ONE - dx1_); @@ -1307,18 +1304,6 @@ namespace kernel::sr { const auto dx2_ { static_cast(dx2(p)) }; const auto dx3_ { static_cast(dx3(p)) }; - const int dx1_less_half = static_cast(dx1_ < - static_cast(0.5)); - const auto dx1_center = static_cast(dx1_less_half) - dx1_; - - const int dx2_less_half = static_cast(dx2_ < - static_cast(0.5)); - const auto dx2_center = static_cast(dx2_less_half) - dx2_; - - const int dx3_less_half = static_cast(dx3_ < - static_cast(0.5)); - const auto dx3_center = static_cast(dx3_less_half) - dx3_; - // direct interpolation of staggered grid // primal = i+ind, dual = i const int indx = static_cast(static_cast(dx1_ + HALF)); @@ -1327,15 +1312,18 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal - const auto w0px = HALF * SQR(HALF + dx1_center); - const auto w1px = static_cast(0.75) - SQR(dx1_center); - const auto w2px = HALF * SQR(HALF - dx1_center); - const auto w0py = HALF * SQR(HALF + dx2_center); - const auto w1py = static_cast(0.75) - SQR(dx2_center); - const auto w2py = HALF * SQR(HALF - dx2_center); - const auto w0pz = HALF * SQR(HALF + dx3_center); - const auto w1pz = static_cast(0.75) - SQR(dx3_center); - const auto w2pz = HALF * SQR(HALF - dx3_center); + const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); + const auto w1px = static_cast(0.75) - + SQR(dx1_ - static_cast(indx)); + const auto w2px = ONE - w0px - w1px; + const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); + const auto w1py = static_cast(0.75) - + SQR(dx2_ - static_cast(indy)); + const auto w2py = ONE - w0py - w1py; + const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); + const auto w1pz = static_cast(0.75) - + SQR(dx3_ - static_cast(indz)); + const auto w2pz = ONE - w0pz - w1pz; // dual const auto w0dx = HALF * SQR(ONE - dx1_); From 2732659a33d72c2a453d1c15410b74c2f5df04c3 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 17 Jul 2025 09:50:45 -0500 Subject: [PATCH 045/154] introduced `THREE_FOURTHS` to shorten expression in pusher and deposit for second order shape function --- src/global/utils/numeric.h | 2 ++ src/kernels/currents_deposit.hpp | 12 ++++++------ src/kernels/particle_pusher_sr.hpp | 18 ++++++------------ 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 9ff262ed8..fd1ddc657 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -40,6 +40,7 @@ inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; inline constexpr float THIRD = 0.333333f; +inline constexpr float THREE_FOURTHS = 0.75f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -56,6 +57,7 @@ inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; inline constexpr double THIRD = 0.3333333333333333; +inline constexpr double THREE_FOURTHS = 0.75; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 0b7409f4a..1f508a9a0 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -102,13 +102,13 @@ namespace kernel { update_i2 = true; S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_1 = THREE_FOURTHS - SQR(di_center_prev); S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = ZERO; S1_1 = HALF * SQR(HALF + di_center); - S1_2 = static_cast(0.75) - SQR(di_center); + S1_2 = THREE_FOURTHS - SQR(di_center); S1_3 = HALF * SQR(HALF - di_center); } else if (shift_i == -1) { /* @@ -121,11 +121,11 @@ namespace kernel { S0_0 = ZERO; S0_1 = HALF * SQR(HALF + di_center_prev); - S0_2 = static_cast(0.75) - SQR(di_center_prev); + S0_2 = THREE_FOURTHS - SQR(di_center_prev); S0_3 = HALF * SQR(HALF - di_center_prev); S1_0 = HALF * SQR(HALF + di_center); - S1_1 = static_cast(0.75) - SQR(di_center); + S1_1 = THREE_FOURTHS - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; @@ -139,12 +139,12 @@ namespace kernel { update_i2 = false; S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = static_cast(0.75) - SQR(di_center_prev); + S0_1 = THREE_FOURTHS - SQR(di_center_prev); S0_2 = HALF * SQR(HALF - di_center_prev); S0_3 = ZERO; S1_0 = HALF * SQR(HALF + di_center); - S1_1 = static_cast(0.75) - SQR(di_center); + S1_1 = THREE_FOURTHS - SQR(di_center); S1_2 = HALF * SQR(HALF - di_center); S1_3 = ZERO; } else { diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 826da86e5..980acca55 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -1107,8 +1107,7 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1p = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1p = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2p = ONE - w0p - w1p; // dual @@ -1166,12 +1165,10 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2px = ONE - w0px - w1px; const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = static_cast(0.75) - - SQR(dx2_ - static_cast(indy)); + const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); const auto w2py = ONE - w0py - w1py; // dual @@ -1313,16 +1310,13 @@ namespace kernel::sr { // Compute weights for second-order interpolation // primal const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = static_cast(0.75) - - SQR(dx1_ - static_cast(indx)); + const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); const auto w2px = ONE - w0px - w1px; const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = static_cast(0.75) - - SQR(dx2_ - static_cast(indy)); + const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); const auto w2py = ONE - w0py - w1py; const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); - const auto w1pz = static_cast(0.75) - - SQR(dx3_ - static_cast(indz)); + const auto w1pz = THREE_FOURTHS - SQR(dx3_ - static_cast(indz)); const auto w2pz = ONE - w0pz - w1pz; // dual From 6e72e046fd95750a57f077177bfcb0bbd150f1ef Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 17 Jul 2025 17:51:54 -0400 Subject: [PATCH 046/154] version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06a7690d7..4cff5b41b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ set(PROJECT_NAME entity) project( ${PROJECT_NAME} - VERSION 1.2.0 + VERSION 1.3.0 LANGUAGES CXX C) add_compile_options("-D ENTITY_VERSION=\"${PROJECT_VERSION}\"") set(hash_cmd "git diff --quiet src/ && echo $(git rev-parse HEAD) ") From 04e456641fe3d194d0543bc8253a99810ec987ab Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Sat, 19 Jul 2025 18:03:41 -0500 Subject: [PATCH 047/154] switch off clang-format in B update - looks cleaner --- src/kernels/faraday_mink.hpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index 1112e56e7..90ea7e34f 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -63,13 +63,15 @@ namespace kernel::mink { Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { - const auto alphax = ONE - THREE * deltax; + const auto alphax = ONE - THREE * deltax; + // clang-format off EB(i1, em::bx2) += coeff1 * ( - + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) - + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); + + alphax * (EB(i1 + 1, em::ex3) - EB(i1 , em::ex3)) + + deltax * (EB(i1 + 2, em::ex3) - EB(i1 - 1, em::ex3))); EB(i1, em::bx3) += coeff1 * ( - - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) - - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + - alphax * (EB(i1 + 1, em::ex2) - EB(i1 , em::ex2)) + - deltax * (EB(i1 + 2, em::ex2) - EB(i1 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 1D implementation called for D != 1"); } @@ -79,6 +81,7 @@ namespace kernel::mink { if constexpr (D == Dim::_2D) { const auto alphax = ONE - TWO * betaxy - THREE * deltax; const auto alphay = ONE - TWO * betayx - THREE * deltay; + // clang-format off EB(i1, i2, em::bx1) += coeff1 * ( - alphay * (EB(i1 , i2 + 1, em::ex3) - EB(i1 , i2 , em::ex3)) - deltay * (EB(i1 , i2 + 2, em::ex3) - EB(i1 , i2 - 1, em::ex3)) @@ -98,6 +101,7 @@ namespace kernel::mink { - deltax * (EB(i1 + 2, i2 , em::ex2) - EB(i1 - 1, i2 , em::ex2)) - betaxy * (EB(i1 + 1, i2 + 1, em::ex2) - EB(i1 , i2 + 1, em::ex2)) - betaxy * (EB(i1 + 1, i2 - 1, em::ex2) - EB(i1 , i2 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 2D implementation called for D != 2"); @@ -109,6 +113,7 @@ namespace kernel::mink { const auto alphax = ONE - TWO * betaxy - TWO * betaxz - THREE * deltax; const auto alphay = ONE - TWO * betayx - TWO * betayz - THREE * deltay; const auto alphaz = ONE - TWO * betazx - TWO * betazy - THREE * deltaz; + // clang-format off EB(i1, i2, i3, em::bx1) += coeff1 * ( + alphaz * (EB(i1 , i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 , em::ex2)) + deltaz * (EB(i1 , i2 , i3 + 2, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2)) @@ -148,6 +153,7 @@ namespace kernel::mink { - betaxy * (EB(i1 + 1, i2 - 1, i3 , em::ex2) - EB(i1 , i2 - 1, i3 , em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 + 1, em::ex2) - EB(i1 , i2 , i3 + 1, em::ex2)) - betaxz * (EB(i1 + 1, i2 , i3 - 1, em::ex2) - EB(i1 , i2 , i3 - 1, em::ex2))); + // clang-format on } else { raise::KernelError(HERE, "Faraday_kernel: 3D implementation called for D != 3"); } From 82ddfad0328e4d7772b2ecfdd7a08a110f6e86ad Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 28 Jul 2025 18:36:41 -0500 Subject: [PATCH 048/154] first step for esirkepov with arbitrary order (wip) --- src/kernels/currents_deposit.hpp | 660 +++++++++++++++++++++++++++++-- 1 file changed, 633 insertions(+), 27 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1f508a9a0..40ca405f4 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -292,6 +292,395 @@ namespace kernel { i_min += N_GHOSTS; } + Inline void W(real_t* _S, real_t x) const { + + if constexpr (O == 2) { + + _S[0] = HALF * SQR(HALF - x); + _S[1] = THREE_FOURTHS - SQR(x); + _S[2] = HALF * SQR(HALF + x); + + } else if constexpr (O == 3) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + + _S[0] = static_cast(1 / 6) * (ONE - x3) - HALF * SQR(x - x2); + _S[1] = static_cast(2 / 3) - x2 + HALF * x3; + _S[2] = static_cast(1 / 6) + HALF * (x + x2 + x3); + _S[3] = static_cast(1 / 6) * x3; + + } else if constexpr (O == 4) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + + _S[0] = static_cast(1 / 384) - static_cast(1 / 48) * x + + static_cast(1 / 16) * x2 - + static_cast(1 / 12) * x3 + + static_cast(1 / 24) * x4; + _S[1] = static_cast(19 / 96) - static_cast(11 / 24) * x + + static_cast(1 / 4) * x2 + + static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; + _S[2] = static_cast(115 / 192) - static_cast(5 / 8) * x2 + + static_cast(1 / 4) * x4; + _S[3] = static_cast(19 / 96) + static_cast(11 / 24) * x + + static_cast(1 / 4) * x2 - + static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; + _S[4] = static_cast(1 / 384) + static_cast(1 / 48) * x + + static_cast(1 / 16) * x2 + + static_cast(1 / 12) * x3 + + static_cast(1 / 24) * x4; + + } else if constexpr (O == 5) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + + _S[0] = static_cast(1.0 / 46080.0) - + static_cast(1.0 / 3840.0) * x + + static_cast(1.0 / 384.0) * x2 - + static_cast(1.0 / 96.0) * x3 + + static_cast(1.0 / 72.0) * x4 - + static_cast(1.0 / 144.0) * x5 + + static_cast(1.0 / 720.0) * x6; + + _S[1] = static_cast(13.0 / 9216.0) - + static_cast(11.0 / 768.0) * x + + static_cast(1.0 / 48.0) * x2 + + static_cast(5.0 / 72.0) * x3 - + static_cast(1.0 / 8.0) * x4 + + static_cast(5.0 / 144.0) * x5 - + static_cast(1.0 / 144.0) * x6; + + _S[2] = static_cast(115.0 / 768.0) - + static_cast(5.0 / 24.0) * x2 + + static_cast(1.0 / 8.0) * x4 - + static_cast(1.0 / 72.0) * x6; + + _S[3] = static_cast(115.0 / 768.0) - + static_cast(5.0 / 24.0) * x2 + + static_cast(1.0 / 8.0) * x4 - + static_cast(1.0 / 72.0) * x6; + + _S[4] = static_cast(13.0 / 9216.0) + + static_cast(11.0 / 768.0) * x + + static_cast(1.0 / 48.0) * x2 - + static_cast(5.0 / 72.0) * x3 - + static_cast(1.0 / 8.0) * x4 - + static_cast(5.0 / 144.0) * x5 - + static_cast(1.0 / 144.0) * x6; + + _S[5] = static_cast(1.0 / 46080.0) + + static_cast(1.0 / 3840.0) * x + + static_cast(1.0 / 384.0) * x2 + + static_cast(1.0 / 96.0) * x3 + + static_cast(1.0 / 72.0) * x4 + + static_cast(1.0 / 144.0) * x5 + + static_cast(1.0 / 720.0) * x6; + + } else if constexpr (O == 6) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + + _S[0] = static_cast(1.0 / 40320.0) - + static_cast(1.0 / 4480.0) * x + + static_cast(1.0 / 640.0) * x2 - + static_cast(1.0 / 192.0) * x3 + + static_cast(1.0 / 144.0) * x4 - + static_cast(1.0 / 288.0) * x5 + + static_cast(1.0 / 1440.0) * x6; + + _S[1] = static_cast(1.0 / 1344.0) - + static_cast(1.0 / 160.0) * x + + static_cast(5.0 / 192.0) * x2 - + static_cast(1.0 / 48.0) * x3 - + static_cast(1.0 / 48.0) * x4 + + static_cast(5.0 / 288.0) * x5 - + static_cast(1.0 / 288.0) * x6; + + _S[2] = static_cast(17.0 / 336.0) - + static_cast(5.0 / 48.0) * x2 + + static_cast(1.0 / 12.0) * x4 - + static_cast(1.0 / 144.0) * x6; + + _S[3] = static_cast(151.0 / 252.0) - + static_cast(35.0 / 48.0) * x2 + + static_cast(5.0 / 12.0) * x4 - + static_cast(1.0 / 36.0) * x6; + + _S[4] = static_cast(17.0 / 336.0) - + static_cast(5.0 / 48.0) * x2 + + static_cast(1.0 / 12.0) * x4 - + static_cast(1.0 / 144.0) * x6; + + _S[5] = static_cast(1.0 / 1344.0) + + static_cast(1.0 / 160.0) * x + + static_cast(5.0 / 192.0) * x2 + + static_cast(1.0 / 48.0) * x3 - + static_cast(1.0 / 48.0) * x4 - + static_cast(5.0 / 288.0) * x5 - + static_cast(1.0 / 288.0) * x6; + + _S[6] = static_cast(1.0 / 40320.0) + + static_cast(1.0 / 4480.0) * x + + static_cast(1.0 / 640.0) * x2 + + static_cast(1.0 / 192.0) * x3 + + static_cast(1.0 / 144.0) * x4 + + static_cast(1.0 / 288.0) * x5 + + static_cast(1.0 / 1440.0) * x6; + + } else if constexpr (O == 7) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + const auto x7 = x4 * x3; + + _S[0] = static_cast(1.0 / 645120.0) - + static_cast(1.0 / 64512.0) * x + + static_cast(1.0 / 9216.0) * x2 - + static_cast(1.0 / 3072.0) * x3 + + static_cast(1.0 / 2304.0) * x4 - + static_cast(1.0 / 4608.0) * x5 + + static_cast(1.0 / 23040.0) * x6 - + static_cast(1.0 / 161280.0) * x7; + + _S[1] = static_cast(1.0 / 9216.0) - + static_cast(5.0 / 4608.0) * x + + static_cast(35.0 / 9216.0) * x2 - + static_cast(7.0 / 768.0) * x3 - + static_cast(7.0 / 1152.0) * x4 + + static_cast(35.0 / 4608.0) * x5 - + static_cast(5.0 / 4608.0) * x6 + + static_cast(1.0 / 9216.0) * x7; + + _S[2] = static_cast(25.0 / 1536.0) - + static_cast(35.0 / 768.0) * x2 + + static_cast(7.0 / 192.0) * x4 - + static_cast(1.0 / 96.0) * x6; + + _S[3] = static_cast(245.0 / 384.0) - + static_cast(245.0 / 192.0) * x2 + + static_cast(49.0 / 48.0) * x4 - + static_cast(7.0 / 72.0) * x6; + + _S[4] = _S[3]; // symmetry + + _S[5] = _S[2]; // symmetry + + _S[6] = static_cast(1 / 9216) + static_cast(5 / 4608) * x + + static_cast(35 / 9216) * x2 + + static_cast(7 / 768) * x3 - + static_cast(7 / 1152) * x4 - + static_cast(35 / 4608) * x5 - + static_cast(5 / 4608) * x6 - + static_cast(1 / 9216) * x7; + + _S[7] = static_cast(1 / 645120) + + static_cast(1 / 64512) * x + + static_cast(1 / 9216) * x2 + + static_cast(1 / 3072) * x3 + + static_cast(1 / 2304) * x4 + + static_cast(1 / 4608) * x5 + + static_cast(1 / 23040) * x6 + + static_cast(1 / 161280) * x7; + + } else if constexpr (O == 8) { + + const auto x2 = x * x; + const auto x3 = x2 * x; + const auto x4 = x2 * x2; + const auto x5 = x3 * x2; + const auto x6 = x3 * x3; + const auto x7 = x4 * x3; + const auto x8 = x4 * x4; + + _S[0] = static_cast(1.0 / 10321920.0) - + static_cast(1.0 / 1146880.0) * x + + static_cast(1.0 / 161280.0) * x2 - + static_cast(1.0 / 53760.0) * x3 + + static_cast(1.0 / 43008.0) * x4 - + static_cast(1.0 / 96768.0) * x5 + + static_cast(1.0 / 645120.0) * x6 - + static_cast(1.0 / 1032192.0) * x7 + + static_cast(1.0 / 4134528.0) * x8; + + _S[1] = static_cast(1.0 / 129024.0) - + static_cast(1.0 / 14336.0) * x + + static_cast(17.0 / 43008.0) * x2 - + static_cast(17.0 / 21504.0) * x3 + + static_cast(17.0 / 21504.0) * x4 - + static_cast(17.0 / 43008.0) * x5 + + static_cast(1.0 / 14336.0) * x6 - + static_cast(1.0 / 129024.0) * x7 + + static_cast(1.0 / 1032192.0) * x8; + + _S[2] = static_cast(361.0 / 64512.0) - + static_cast(153.0 / 14336.0) * x2 + + static_cast(51.0 / 14336.0) * x4 - + static_cast(17.0 / 43008.0) * x6 + + static_cast(1.0 / 1032192.0) * x8; + + _S[3] = static_cast(3061.0 / 16128.0) - + static_cast(170.0 / 1792.0) * x2 + + static_cast(34.0 / 1536.0) * x4 - + static_cast(17.0 / 16128.0) * x6; + + _S[4] = static_cast(257135.0 / 32256.0) - + static_cast(1785.0 / 896.0) * x2 + + static_cast(255.0 / 256.0) * x4 - + static_cast(85.0 / 1152.0) * x6; + + _S[5] = _S[3]; // symmetry + + _S[6] = _S[2]; // symmetry + + _S[7] = static_cast(1 / 129024) + + static_cast(1 / 14336) * x + + static_cast(17 / 43008) * x2 + + static_cast(17 / 21504) * x3 + + static_cast(17 / 21504) * x4 + + static_cast(17 / 43008) * x5 + + static_cast(1 / 14336) * x6 + + static_cast(1 / 129024) * x7 + + static_cast(1 / 1032192) * x8; + + _S[8] = static_cast(1 / 10321920) + + static_cast(1 / 1146880) * x + + static_cast(1 / 161280) * x2 + + static_cast(1 / 53760) * x3 + + static_cast(1 / 43008) * x4 + + static_cast(1 / 96768) * x5 + + static_cast(1 / 645120) * x6 + + static_cast(1 / 1032192) * x7 + + static_cast(1 / 4134528) * x8; + + } else { + raise::KernelError(HERE, "Invalid order of shape function!"); + } + } + + Inline void shape_function_Nth(real_t* S0, + real_t* S1, + ncells_t& i_min, + const index_t& i, + const real_t& di, + const index_t& i_prev, + const real_t& di_prev) const { + /* + Shape function per particle is a O+1 element array. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + (-1) 0 1 ... N N+1 + __________________________________________ + | | x* | x* | // | x* | | // shift_i = 0 + |______|______|______|______|______|______| + | | x | x* | // | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + | * | x* | x* | // | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + // find shift in indices + // ToDo: fix + const int di_less_half = static_cast(di < static_cast(0.5)); + const int di_prev_less_half = static_cast( + di_prev < static_cast(0.5)); + + const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + + // find the minimum index of the shape function -> ToDo! + i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + + // center index of the shape function -> ToDo! + const auto di_center_prev = static_cast(1 - di_prev_less_half) - + di_prev; + const auto di_center = static_cast(1 - di_less_half) - di; + // ToDo: end fix + + real_t _S0[O+1], _S1[O+1]; + // apply shape function + W(_S0, di_center_prev); + W(_S1, di_center); + + // find indices and define shape function + if (shift_i == 1) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | | x | x* | // | x* | * | // shift_i = 1 + |______|______|______|______|______|______| + */ + + for (int j = 0; j < O; j++) { + S0[j] = _S0[j]; + } + S0[O + 1] = ZERO; + + S1[0] = ZERO; + for (int j = 0; j < O; j++) { + S1[j+1] = _S1[j]; + } + + } else if (shift_i == -1) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | * | x* | x* | // | x | | // shift_i = -1 + |______|______|______|______|______|______| + */ + + S0[0] = ZERO; + for (int j = 0; j < O; j++) { + S0[j+1] = _S0[j]; + } + + for (int j = 0; j < O; j++) { + S1[j] = _S1[j]; + } + S1[O+1] = ZERO; + + } else if (shift_i == 0) { + /* + (-1) 0 1 ... N N+1 + __________________________________________ + | | x* | x* | // | x* | | // shift_i = 0 + |______|______|______|______|______|______| + */ + + for (int j = 0; j < O; j++) { + S0[j] = _S0[j]; + } + S0[O + 1] = ZERO; + + for (int j = 0; j < O; j++) { + S1[j] = _S1[j]; + } + S1[O + 1] = ZERO; + } else { + raise::KernelError(HERE, "Invalid shift in indices"); + } + + // account for ghost cells here to shorten J update expression + i_min += N_GHOSTS; + } + public: /** * @brief explicit constructor. @@ -932,27 +1321,6 @@ namespace kernel { i3_prev(p), dx3_prev(p)); // clang-format on - // Calculate weight function - // for (int i = 0; i < interp_order + 2; ++i) { - // for (int j = 0; j < interp_order + 2; ++j) { - // for (int k = 0; k < interp_order + 2; ++k) { - // // Esirkepov 2001, Eq. 31 - // Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - // ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - // HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); - // - // Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - // (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - // HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); - // - // Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - // (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - // HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); - // } - // } - // } - // - // Unrolled calculations for Wx, Wy, and Wz // clang-format off const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * @@ -2178,13 +2546,251 @@ namespace kernel { if (update_x3 && update_y3) { J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; } - } // dim - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); + + } // dim -> ToDo: 3D! + + } else if constexpr (O > 3u) { + + // shape function in dim1 -> always required + real_t S0x[O + 2], S1x[O + 2]; + // indices of the shape function + ncells_t ix_min; + + // ToDo: Call shape function + + if constexpr (D == Dim::_1D) { + // ToDo + } else if constexpr (D == Dim::_2D) { + + // shape function in dim2 + real_t S0y[O + 2], S1y[O + 2]; + // indices of the shape function + ncells_t iy_min; + + // ToDo: Call shape function + + // define weight tensors + real_t Wx[O + 1][O + 1]; + real_t Wy[O + 1][O + 1]; + real_t Wz[O + 1][O + 1]; + +// Calculate weight function +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + // Esirkepov 2001, Eq. 38 + Wx[i][j] = (S1x[i] - S0x[i]) * (S0y[j] + HALF * (S1y[j] - S0y[j])); + + Wy[i][j] = (S1y[i] - S0y[i]) * (S0y[j] + HALF * (S1x[j] - S0x[j])); + + Wz[i][j] = S0x[i] * S0y[j] + HALF * (S1x[i] - S1x[i]) * S0y[j] + + HALF * S0x[i] * (S1y[j] - S0y[j]) + + THIRD * (S1x[i] - S0x[i]) * (S1y[j] - S0y[j]); + } + } + + // contribution within the shape function stencil + real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; + + // prefactors to j update + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t QVz = coeff * inv_dt * vp[2]; + + // Calculate current contribution + + // jx +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jx[0][j] = -Qdxdt * Wx[0][j]; + } + +#pragma unroll + for (int i = 1; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jx[i][j] = jx[i - 1][j] - Qdxdt * Wx[i][j]; + } + } + + // jy +#pragma unroll + for (int i = 0; i < O + 2; ++i) { + jy[i][0] = -Qdydt * Wy[i][0]; + } + +#pragma unroll + for (int j = 1; j < O + 2; ++j) { +#pragma unroll + for (int i = 0; i < O + 2; ++i) { + jy[i][j] = jy[i][j - 1] - Qdydt * Wy[i][j]; + } + } + + // jz +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jz[i][j] = QVz * Wz[i][j]; + } + } + + /* + Current update + */ + auto J_acc = J.access(); + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + J_acc(ix_min + i, iy_min + j, cur::jx1) += jx[i][j]; + J_acc(ix_min + i, iy_min + j, cur::jx2) += jy[i][j]; + J_acc(ix_min + i, iy_min + j, cur::jx3) += jz[i][j]; + } + } + + } else if constexpr (D == Dim::_3D) { + // shape function in dim2 + real_t S0y[O + 2], S1y[O + 2]; + // indices of the shape function + ncells_t iy_min; + + // ToDo: Call shape function + + // shape function in dim3 + real_t S0z[O + 2], S1z[O + 2]; + // indices of the shape function + ncells_t iz_min; + + // ToDo: Call shape function + + // define weight tensors + real_t Wx[O + 1][O + 1][O + 1]; + real_t Wy[O + 1][O + 1][O + 1]; + real_t Wz[O + 1][O + 1][O + 1]; + +// Calculate weight function +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + // Esirkepov 2001, Eq. 31 + Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * + ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + + HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + + Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * + (S0x[i] * S0z[k] + S1x[i] * S1z[k] + + HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + + Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * + (S0x[i] * S0y[j] + S1x[i] * S1y[j] + + HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + } + } + } + + // contribution within the shape function stencil + real_t jx[O + 2][O + 2][O + 2], jy[O + 2][O + 2][O + 2], + jz[O + 2][O + 2][O + 2]; + + // prefactors to j update + const real_t Qdxdt = coeff * inv_dt; + const real_t Qdydt = coeff * inv_dt; + const real_t Qdzdt = coeff * inv_dt; + + // Calculate current contribution + + // jx +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jx[0][j][k] = -Qdxdt * Wx[0][j][k]; + } + } + +#pragma unroll + for (int i = 1; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; j < O + 2; ++k) { + jx[i][j][k] = jx[i - 1][j][k] - Qdxdt * Wx[i][j][k]; + } + } + } + + // jy +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jy[i][0][k] = -Qdydt * Wy[i][0][k]; + } + } + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 1; j < O + 2; ++j) { +#pragma unroll + for (int k = 0; k < O + 2; ++k) { + jy[i][j][k] = jy[i][j - 1][k] - Qdydt * Wy[i][j][k]; + } + } + } + + // jz +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { + jy[i][j][0] = -Qdydt * Wy[i][j][0]; + } + } + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 1; k < O + 2; ++k) { + jz[i][j][k] = jz[i][j][k - 1] - Qdzdt * Wz[i][j][k]; + } + } + } + + /* + Current update + */ + auto J_acc = J.access(); + +#pragma unroll + for (int i = 0; i < O + 2; ++i) { +#pragma unroll + for (int j = 0; j < O + 2; ++j) { +#pragma unroll + for (int k = 1; k < O + 2; ++k) { + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx1) += jx[i][j][k]; + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx2) += jy[i][j][k]; + J_acc(ix_min + i, iy_min + j, iz_min, cur::jx3) += jz[i][j][k]; + } + } + } + } + + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } } - } - }; -} // namespace kernel + }; + } // namespace kernel #undef i_di_to_Xi From 90f5019c0bfcd18badac18f5647c1c0bab33e8b3 Mon Sep 17 00:00:00 2001 From: hayk Date: Sun, 3 Aug 2025 11:13:52 -0400 Subject: [PATCH 049/154] new esirkepov (WIP, not yet working properly) --- src/kernels/currents_deposit.hpp | 2562 ++++++++++++++-------------- src/kernels/particle_pusher_sr.hpp | 10 +- src/kernels/particle_shapes.hpp | 107 ++ 3 files changed, 1395 insertions(+), 1284 deletions(-) create mode 100644 src/kernels/particle_shapes.hpp diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 40ca405f4..1feb7ba4e 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -14,8 +14,11 @@ #include "global.h" #include "arch/kokkos_aliases.h" +#include "utils/error.h" #include "utils/numeric.h" +#include "particle_shapes.hpp" + #include #define i_di_to_Xi(I, DI) static_cast((I)) + static_cast((DI)) @@ -43,117 +46,117 @@ namespace kernel { const M metric; const real_t charge, inv_dt; - Inline void shape_function_2nd(real_t& S0_0, - real_t& S0_1, - real_t& S0_2, - real_t& S0_3, - real_t& S1_0, - real_t& S1_1, - real_t& S1_2, - real_t& S1_3, - ncells_t& i_min, - bool& update_i2, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a 4 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 2 3 - ___________________________________ - | | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______| - | | x | x* | x* | * | // shift_i = 1 - |______|______|______|______|______| - | * | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______| - */ - - // find shift in indices - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center = static_cast(1 - di_less_half) - di; - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 2 3 - ___________________________________ - | | x | x* | x* | * | // shift_i = 1 - |______|______|______|______|______| - */ - update_i2 = true; - - S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = THREE_FOURTHS - SQR(di_center_prev); - S0_2 = HALF * SQR(HALF - di_center_prev); - S0_3 = ZERO; - - S1_0 = ZERO; - S1_1 = HALF * SQR(HALF + di_center); - S1_2 = THREE_FOURTHS - SQR(di_center); - S1_3 = HALF * SQR(HALF - di_center); - } else if (shift_i == -1) { - /* - (-1) 0 1 2 3 - ___________________________________ - | * | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______| - */ - update_i2 = true; - - S0_0 = ZERO; - S0_1 = HALF * SQR(HALF + di_center_prev); - S0_2 = THREE_FOURTHS - SQR(di_center_prev); - S0_3 = HALF * SQR(HALF - di_center_prev); - - S1_0 = HALF * SQR(HALF + di_center); - S1_1 = THREE_FOURTHS - SQR(di_center); - S1_2 = HALF * SQR(HALF - di_center); - S1_3 = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 2 3 - ___________________________________ - | | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______| - */ - update_i2 = false; - - S0_0 = HALF * SQR(HALF + di_center_prev); - S0_1 = THREE_FOURTHS - SQR(di_center_prev); - S0_2 = HALF * SQR(HALF - di_center_prev); - S0_3 = ZERO; - - S1_0 = HALF * SQR(HALF + di_center); - S1_1 = THREE_FOURTHS - SQR(di_center); - S1_2 = HALF * SQR(HALF - di_center); - S1_3 = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } + // Inline void shape_function_2nd(real_t& S0_0, + // real_t& S0_1, + // real_t& S0_2, + // real_t& S0_3, + // real_t& S1_0, + // real_t& S1_1, + // real_t& S1_2, + // real_t& S1_3, + // ncells_t& i_min, + // bool& update_i2, + // const index_t& i, + // const real_t& di, + // const index_t& i_prev, + // const real_t& di_prev) const { + // /* + // Shape function per particle is a 4 element array. + // We need to find which indices are contributing to the shape function + // For this we first compute the indices of the particle position + // + // Let * be the particle position at the current timestep + // Let x be the particle position at the previous timestep + // + // + // (-1) 0 1 2 3 + // ___________________________________ + // | | x* | x* | x* | | // shift_i = 0 + // |______|______|______|______|______| + // | | x | x* | x* | * | // shift_i = 1 + // |______|______|______|______|______| + // | * | x* | x* | x | | // shift_i = -1 + // |______|______|______|______|______| + // */ + // + // // find shift in indices + // const int di_less_half = static_cast(di < static_cast(0.5)); + // const int di_prev_less_half = static_cast( + // di_prev < static_cast(0.5)); + // + // const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); + // + // // find the minimum index of the shape function + // i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); + // + // // center index of the shape function + // const auto di_center_prev = static_cast(1 - di_prev_less_half) - + // di_prev; + // const auto di_center = static_cast(1 - di_less_half) - di; + // + // // find indices and define shape function + // if (shift_i == 1) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | | x | x* | x* | * | // shift_i = 1 + // |______|______|______|______|______| + // */ + // update_i2 = true; + // + // S0_0 = HALF * SQR(HALF + di_center_prev); + // S0_1 = THREE_FOURTHS - SQR(di_center_prev); + // S0_2 = HALF * SQR(HALF - di_center_prev); + // S0_3 = ZERO; + // + // S1_0 = ZERO; + // S1_1 = HALF * SQR(HALF + di_center); + // S1_2 = THREE_FOURTHS - SQR(di_center); + // S1_3 = HALF * SQR(HALF - di_center); + // } else if (shift_i == -1) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | * | x* | x* | x | | // shift_i = -1 + // |______|______|______|______|______| + // */ + // update_i2 = true; + // + // S0_0 = ZERO; + // S0_1 = HALF * SQR(HALF + di_center_prev); + // S0_2 = THREE_FOURTHS - SQR(di_center_prev); + // S0_3 = HALF * SQR(HALF - di_center_prev); + // + // S1_0 = HALF * SQR(HALF + di_center); + // S1_1 = THREE_FOURTHS - SQR(di_center); + // S1_2 = HALF * SQR(HALF - di_center); + // S1_3 = ZERO; + // + // } else if (shift_i == 0) { + // /* + // (-1) 0 1 2 3 + // ___________________________________ + // | | x* | x* | x* | | // shift_i = 0 + // |______|______|______|______|______| + // */ + // update_i2 = false; + // + // S0_0 = HALF * SQR(HALF + di_center_prev); + // S0_1 = THREE_FOURTHS - SQR(di_center_prev); + // S0_2 = HALF * SQR(HALF - di_center_prev); + // S0_3 = ZERO; + // + // S1_0 = HALF * SQR(HALF + di_center); + // S1_1 = THREE_FOURTHS - SQR(di_center); + // S1_2 = HALF * SQR(HALF - di_center); + // S1_3 = ZERO; + // } else { + // raise::KernelError(HERE, "Invalid shift in indices"); + // } + // + // // account for ghost cells here to shorten J update expression + // i_min += N_GHOSTS; + // } Inline void shape_function_3rd(real_t& S0_0, real_t& S0_1, @@ -614,7 +617,7 @@ namespace kernel { const auto di_center = static_cast(1 - di_less_half) - di; // ToDo: end fix - real_t _S0[O+1], _S1[O+1]; + real_t _S0[O + 1], _S1[O + 1]; // apply shape function W(_S0, di_center_prev); W(_S1, di_center); @@ -635,7 +638,7 @@ namespace kernel { S1[0] = ZERO; for (int j = 0; j < O; j++) { - S1[j+1] = _S1[j]; + S1[j + 1] = _S1[j]; } } else if (shift_i == -1) { @@ -648,13 +651,13 @@ namespace kernel { S0[0] = ZERO; for (int j = 0; j < O; j++) { - S0[j+1] = _S0[j]; + S0[j + 1] = _S0[j]; } for (int j = 0; j < O; j++) { S1[j] = _S1[j]; } - S1[O+1] = ZERO; + S1[O + 1] = ZERO; } else if (shift_i == 0) { /* @@ -1026,1185 +1029,1182 @@ namespace kernel { } } else if constexpr (O == 2u) { /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + * Higher order charge conserving current deposition based on + * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + **/ - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ + // iS -> shape function for init position + // fS -> shape function for final position - /* - x - direction - */ + // shape function at staggered points (one coeff is always ZERO) + int i1_minH; + real_t iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3; + real_t fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3; + + // shape function at integer points (one coeff is always ZERO) + int i1_min; + real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; + real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3; - // indices of the shape function - ncells_t ix_min; - bool update_x2; - // find indices and define shape function // clang-format off - shape_function_2nd(S0x_0, S0x_1, S0x_2, S0x_3, - S1x_0, S1x_1, S1x_2, S1x_3, - ix_min, update_x2, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_minH, + iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3, + fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_min, + iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); // clang-format on if constexpr (D == Dim::_1D) { - // ToDo + raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { - /* - y - direction - */ + // shape function at staggered points (one coeff is always ZERO) + int i2_minH; + real_t iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3; + real_t fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3; + + // shape function at integer points (one coeff is always ZERO) + int i2_min; + real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; + real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - bool update_y2; - // find indices and define shape function // clang-format off - shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - S1y_0, S1y_1, S1y_2, S1y_3, - iy_min, update_y2, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_minH, + iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3, + fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_min, + iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); // clang-format on - - // Esirkepov 2001, Eq. 38 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * vp[2]; - - // Esirkepov - Eq. 39 - // x-component - const auto jx_0_0 = -Qdxdt * Wx_0_0; - const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; - const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - - const auto jx_0_1 = -Qdxdt * Wx_0_1; - const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; - const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - - const auto jx_0_2 = -Qdxdt * Wx_0_2; - const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; - const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - - const auto jx_0_3 = -Qdxdt * Wx_0_3; - const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; - const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; + // x1-components + const auto Wx1_00 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_0 + iS_x2_0); + const auto Wx1_01 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_1 + iS_x2_1); + const auto Wx1_02 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_2 + iS_x2_2); + const auto Wx1_03 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_10 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_0 + iS_x2_0); + const auto Wx1_11 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_1 + iS_x2_1); + const auto Wx1_12 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_2 + iS_x2_2); + const auto Wx1_13 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_20 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_0 + iS_x2_0); + const auto Wx1_21 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_1 + iS_x2_1); + const auto Wx1_22 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_2 + iS_x2_2); + const auto Wx1_23 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_30 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_0 + iS_x2_0); + const auto Wx1_31 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_1 + iS_x2_1); + const auto Wx1_32 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_2 + iS_x2_2); + const auto Wx1_33 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_3 + iS_x2_3); + + // x2-components + const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_3 - iS_x2H_3); + + const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_0 - iS_x2H_0); + const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_1 - iS_x2H_1); + const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_2 - iS_x2H_2); + const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_3 - iS_x2H_3); + + // x3-components + const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); + const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + + iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + + const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); + const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + + iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + + const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); + const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + + iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + + const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); + const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + + iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + + // x1-component + const auto jx1_00 = Wx1_00; + const auto jx1_10 = jx1_00 + Wx1_10; + const auto jx1_20 = jx1_10 + Wx1_20; + const auto jx1_30 = jx1_20 + Wx1_30; + + const auto jx1_01 = Wx1_01; + const auto jx1_11 = jx1_01 + Wx1_11; + const auto jx1_21 = jx1_11 + Wx1_21; + const auto jx1_31 = jx1_21 + Wx1_31; + + const auto jx1_02 = Wx1_02; + const auto jx1_12 = jx1_02 + Wx1_12; + const auto jx1_22 = jx1_12 + Wx1_22; + const auto jx1_32 = jx1_22 + Wx1_32; + + const auto jx1_03 = Wx1_03; + const auto jx1_13 = jx1_03 + Wx1_13; + const auto jx1_23 = jx1_13 + Wx1_23; + const auto jx1_33 = jx1_23 + Wx1_33; // y-component - const auto jy_0_0 = -Qdydt * Wy_0_0; - const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; - const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - - const auto jy_1_0 = -Qdydt * Wy_1_0; - const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; - const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - - const auto jy_2_0 = -Qdydt * Wy_2_0; - const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; - const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; + const auto jx2_00 = Wx2_00; + const auto jx2_01 = jx2_00 + Wx2_01; + const auto jx2_02 = jx2_01 + Wx2_02; + const auto jx2_03 = jx2_02 + Wx2_03; + + const auto jx2_10 = Wx2_10; + const auto jx2_11 = jx2_10 + Wx2_11; + const auto jx2_12 = jx2_11 + Wx2_12; + const auto jx2_13 = jx2_12 + Wx2_13; + + const auto jx2_20 = Wx2_20; + const auto jx2_21 = jx2_20 + Wx2_21; + const auto jx2_22 = jx2_21 + Wx2_22; + const auto jx2_23 = jx2_22 + Wx2_23; + + const auto jx2_30 = Wx2_30; + const auto jx2_31 = jx2_30 + Wx2_31; + const auto jx2_32 = jx2_31 + Wx2_32; + const auto jx2_33 = jx2_32 + Wx2_33; + + i1_minH += N_GHOSTS; + i1_min += N_GHOSTS; + i2_minH += N_GHOSTS; + i2_min += N_GHOSTS; + + // @TODO: not sure about the signs here + const real_t Qdx1dt = -coeff * inv_dt; + const real_t Qdx2dt = -coeff * inv_dt; + const real_t QVx3 = coeff * vp[2]; - const auto jy_3_0 = -Qdydt * Wy_3_0; - const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; - const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - - /* - Current update - */ auto J_acc = J.access(); - /* - x - component - */ - J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - - if (update_x2) { - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; - } - - if (update_y2) { - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; - } - - if (update_x2 && update_y2) { - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; - } - - /* - y - component - */ - J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - - if (update_x2) { - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; - } - - if (update_y2) { - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; - } - - if (update_x2 && update_y2) { - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; - } - /* - z - component, unsimulated direction - */ - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - - if (update_x2) { - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - } - - if (update_y2) { - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - } - if (update_x2 && update_y2) { - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - } - + // x1-currents + J_acc(i1_minH + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + J_acc(i1_minH + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + J_acc(i1_minH + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + J_acc(i1_minH + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + J_acc(i1_minH + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + J_acc(i1_minH + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + J_acc(i1_minH + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + J_acc(i1_minH + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + J_acc(i1_minH + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + J_acc(i1_minH + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + J_acc(i1_minH + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + J_acc(i1_minH + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + J_acc(i1_minH + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + J_acc(i1_minH + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + J_acc(i1_minH + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + J_acc(i1_minH + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + + // x2-currents + J_acc(i1_min + 0, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_00; + J_acc(i1_min + 0, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_01; + J_acc(i1_min + 0, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_02; + J_acc(i1_min + 0, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_03; + + J_acc(i1_min + 1, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_10; + J_acc(i1_min + 1, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_11; + J_acc(i1_min + 1, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_12; + J_acc(i1_min + 1, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_13; + + J_acc(i1_min + 2, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_20; + J_acc(i1_min + 2, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_21; + J_acc(i1_min + 2, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_22; + J_acc(i1_min + 2, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_23; + + J_acc(i1_min + 3, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_30; + J_acc(i1_min + 3, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_31; + J_acc(i1_min + 3, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_32; + J_acc(i1_min + 3, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_33; + + // x3-currents + J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; } else if constexpr (D == Dim::_3D) { - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3; - // indices of the shape function - ncells_t iy_min; - bool update_y2; - // find indices and define shape function - // clang-format off - shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - S1y_0, S1y_1, S1y_2, S1y_3, - iy_min, update_y2, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - // clang-format on - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0z_0, S0z_1, S0z_2, S0z_3; - // shape function at current timestep - real_t S1z_0, S1z_1, S1z_2, S1z_3; - // indices of the shape function - ncells_t iz_min; - bool update_z2; - // find indices and define shape function - // clang-format off - shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, - S1z_0, S1z_1, S1z_2, S1z_3, - iz_min, update_z2, - i3(p), dx3(p), - i3_prev(p), dx3_prev(p)); - // clang-format on - - // Unrolled calculations for Wx, Wy, and Wz - // clang-format off - const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - - const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - - const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - - const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - - const real_t Qdxdt = coeff * inv_dt; - - const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; - const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; - const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; - const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; - const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; - const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; - const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; - const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; - const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; - const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; - const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; - const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; - - const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; - const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; - const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; - const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; - const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; - const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; - const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; - const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; - const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; - const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; - const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; - const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; - - const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; - const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; - const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; - const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; - const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; - const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; - const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; - const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; - const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; - const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; - const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; - const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; - - const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; - const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; - const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; - const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; - const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; - const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; - const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; - const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; - const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; - const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; - const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; - const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; - - /* - y-component - */ - const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - - const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - - const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - - const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - - const real_t Qdydt = coeff * inv_dt; - - const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; - const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; - const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; - const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; - const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; - const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; - const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; - const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; - const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; - const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; - const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; - const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; - - const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; - const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; - const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; - const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; - const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; - const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; - const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; - const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; - const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; - const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; - const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; - const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; - - const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; - const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; - const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; - const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; - const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; - const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; - const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; - const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; - const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; - const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; - const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; - const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; - - const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; - const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; - const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; - const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; - const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; - const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; - const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; - const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; - const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; - const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; - const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; - const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; - - /* - z - component - */ - const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - - const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - - const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - - const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - - // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - - const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - - const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - - const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - - // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 - const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - - const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - - const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - - const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - - // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - - const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - - const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - - const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - - const real_t Qdzdt = coeff * inv_dt; - - const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; - const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; - const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; - const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; - const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; - const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; - const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; - const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; - const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; - const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; - const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; - const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; - - const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; - const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; - const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; - const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; - const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; - const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; - const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; - const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; - const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; - const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; - const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; - const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; - - const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; - const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; - const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; - const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; - const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; - const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; - const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; - const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; - const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; - const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; - const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; - const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; - - const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; - const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; - const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; - const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; - const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; - const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; - const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; - const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; - const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; - const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; - const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; - const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; - - - /* - Current update - */ - auto J_acc = J.access(); - - J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; - J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; - - if (update_x2) - { - J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; - - if (update_y2) - { - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; - } - - if (update_z2) - { - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; - - if (update_y2) - { - J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; - } - } - } + // /* + // y - direction + // */ // - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; - J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; - } - } - - - /* - y-component - */ - J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; - J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; - - if (update_z2) - { - J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; - J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; - } - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; - - if (update_z2) - { - J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; - J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; - } - } - - if (update_z2) - { - J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; - J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; - } - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; - J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; - J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; - J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; - J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; - J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; - } - - /* - z-component - */ - J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; - J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; - J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; - J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; - J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; - J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; - J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; - J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; - J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; - J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; - J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; - J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; - J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; - J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; - J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; - J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; - J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; - J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; - J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; - J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; - J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; - J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; - J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; - J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; - J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; - J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; - J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; - J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; - J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; - J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; - } - - if (update_z2) - { - J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; - J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; - J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; - J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; - J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; - J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; - J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; - J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; - J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; - - if (update_x2) - { - J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; - J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; - J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; - - if (update_y2) - { - J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; - } - } - - if (update_y2) - { - J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; - J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; - J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; - } - } + // // shape function at previous timestep + // real_t S0y_0, S0y_1, S0y_2, S0y_3; + // // shape function at current timestep + // real_t S1y_0, S1y_1, S1y_2, S1y_3; + // // indices of the shape function + // ncells_t iy_min; + // bool update_y2; + // // find indices and define shape function + // // clang-format off + // shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, + // S1y_0, S1y_1, S1y_2, S1y_3, + // iy_min, update_y2, + // i2(p), dx2(p), + // i2_prev(p), dx2_prev(p)); + // // clang-format on + // + // /* + // y - direction + // */ + // + // // shape function at previous timestep + // real_t S0z_0, S0z_1, S0z_2, S0z_3; + // // shape function at current timestep + // real_t S1z_0, S1z_1, S1z_2, S1z_3; + // // indices of the shape function + // ncells_t iz_min; + // bool update_z2; + // // find indices and define shape function + // // clang-format off + // shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, + // S1z_0, S1z_1, S1z_2, S1z_3, + // iz_min, update_z2, + // i3(p), dx3(p), + // i3_prev(p), dx3_prev(p)); + // // clang-format on + // + // // Unrolled calculations for Wx, Wy, and Wz + // // clang-format off + // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + + // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); + // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + + // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); + // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + + // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); + // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + + // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); + // + // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + + // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); + // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + + // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); + // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + + // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); + // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + + // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); + // + // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + + // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); + // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + + // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); + // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + + // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); + // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + + // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); + // + // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + + // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); + // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + + // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); + // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + + // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); + // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * + // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + + // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); + // + // const real_t Qdxdt = coeff * inv_dt; + // + // const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; + // const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; + // const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; + // const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; + // const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; + // const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; + // const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; + // const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; + // const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; + // const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; + // const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; + // const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; + // + // const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; + // const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; + // const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; + // const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; + // const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; + // const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; + // const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; + // const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; + // const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; + // const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; + // const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; + // const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; + // + // const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; + // const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; + // const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; + // const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; + // const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; + // const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; + // const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; + // const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; + // const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; + // const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; + // const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; + // const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; + // + // const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; + // const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; + // const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; + // const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; + // const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; + // const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; + // const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; + // const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; + // const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; + // const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; + // const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; + // const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; + // + // /* + // y-component + // */ + // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + + // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); + // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + + // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); + // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + + // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); + // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + + // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); + // + // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + + // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); + // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + + // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); + // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + + // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); + // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + + // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); + // + // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + + // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); + // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + + // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); + // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + + // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); + // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + + // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); + // + // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + + // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); + // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + + // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); + // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + + // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); + // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * + // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + + // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); + // + // const real_t Qdydt = coeff * inv_dt; + // + // const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; + // const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; + // const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; + // const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; + // const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; + // const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; + // const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; + // const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; + // const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; + // const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; + // const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; + // const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; + // + // const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; + // const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; + // const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; + // const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; + // const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; + // const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; + // const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; + // const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; + // const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; + // const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; + // const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; + // const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; + // + // const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; + // const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; + // const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; + // const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; + // const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; + // const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; + // const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; + // const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; + // const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; + // const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; + // const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; + // const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; + // + // const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; + // const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; + // const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; + // const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; + // const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; + // const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; + // const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; + // const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; + // const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; + // const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; + // const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; + // const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; + // + // /* + // z - component + // */ + // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + + // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); + // + // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + + // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); + // + // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + + // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); + // + // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + + // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); + // + // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 + // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + + // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); + // + // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + + // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); + // + // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + + // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); + // + // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + + // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); + // + // // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 + // const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + + // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); + // + // const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + + // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); + // + // const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + + // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); + // + // const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + + // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); + // + // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 + // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + + // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); + // + // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + + // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); + // + // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + + // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); + // + // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * + // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + + // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); + // + // const real_t Qdzdt = coeff * inv_dt; + // + // const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; + // const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; + // const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; + // const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; + // const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; + // const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; + // const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; + // const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; + // const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; + // const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; + // const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; + // const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; + // + // const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; + // const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; + // const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; + // const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; + // const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; + // const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; + // const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; + // const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; + // const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; + // const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; + // const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; + // const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; + // + // const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; + // const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; + // const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; + // const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; + // const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; + // const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; + // const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; + // const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; + // const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; + // const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; + // const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; + // const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; + // + // const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; + // const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; + // const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; + // const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; + // const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; + // const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; + // const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; + // const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; + // const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; + // const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; + // const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; + // const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; + // + // + // /* + // Current update + // */ + // auto J_acc = J.access(); + // + // J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; + // + // if (update_y2) + // { + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; + // } + // + // if (update_z2) + // { + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; + // + // if (update_y2) + // { + // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; + // } + // } + // } + // // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; + // } + // } + // + // + // /* + // y-component + // */ + // J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; + // + // if (update_z2) + // { + // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; + // } + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; + // + // if (update_z2) + // { + // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; + // } + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; + // } + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; + // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; + // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; + // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; + // } + // + // /* + // z-component + // */ + // J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; + // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; + // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; + // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; + // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; + // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; + // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; + // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; + // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; + // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; + // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; + // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; + // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; + // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; + // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; + // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; + // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; + // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; + // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; + // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; + // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; + // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; + // } + // + // if (update_z2) + // { + // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; + // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; + // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; + // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; + // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; + // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; + // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; + // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; + // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; + // + // if (update_x2) + // { + // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; + // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; + // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; + // + // if (update_y2) + // { + // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; + // } + // } + // + // if (update_y2) + // { + // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; + // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; + // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; + // } + // } // clang-format on } // dimension @@ -2784,13 +2784,13 @@ namespace kernel { } } } - - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } + + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); } - }; - } // namespace kernel + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 980acca55..7d267ce9c 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -475,9 +475,13 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - //getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - getInterpFlds2nd(p, ei, bi); + // getInterpFlds(p, ei, bi); + // ToDo: Better way to call this + // getInterpFlds2nd(p, ei, bi); + for (auto i { 0u }; i < 3u; ++i) { + ei[i] = ZERO; + bi[i] = ZERO; + } metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp new file mode 100644 index 000000000..97b5bde47 --- /dev/null +++ b/src/kernels/particle_shapes.hpp @@ -0,0 +1,107 @@ +/** + * @file kernels/particle_shapes.hpp + * @brief Functions to compute particle shapes at specific locations on the grid. + * @implements: + * - order_2<> -> void + * @namespaces: + * - prtl_shape:: + */ + +#ifndef KERNELS_PARTICLE_SHAPES_HPP +#define KERNELS_PARTICLE_SHAPES_HPP + +#include "global.h" + +#include "utils/error.h" +#include "utils/numeric.h" + +namespace prtl_shape { + + template + Inline void order_2nd(const int& i, + const real_t& di, + int& i_min, + real_t& S0, + real_t& S1, + real_t& S2) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 1; + S0 = HALF * SQR(HALF - di); + S1 = THREE_FOURTHS - SQR(di); + S2 = ONE - S0 - S1; + } else { + i_min = i; + S0 = HALF * SQR(THREE_FOURTHS - di); + S2 = HALF * SQR(di - HALF); + S1 = ONE - S0 - S2; + } + } else { // compute at i + 1/2 positions + i_min = i - 1; + S1 = HALF + di - SQR(di); + S2 = HALF * SQR(di); + S0 = ONE - S1 - S2; + } + } + + template + Inline void for_deposit_2nd(const int& i_init, + const real_t& di_init, + const int& i_fin, + const real_t& di_fin, + int& i_min, + real_t& iS_0, + real_t& iS_1, + real_t& iS_2, + real_t& iS_3, + real_t& fS_0, + real_t& fS_1, + real_t& fS_2, + real_t& fS_3) { + int i_init_min, i_fin_min; + + real_t iS_0_, iS_1_, iS_2_; + real_t fS_0_, fS_1_, fS_2_; + + order_2nd(i_init, di_init, i_init_min, iS_0_, iS_1_, iS_2_); + order_2nd(i_fin, di_fin, i_fin_min, fS_0_, fS_1_, fS_2_); + + if (i_init_min < i_fin_min) { + i_min = i_init_min; + iS_0 = iS_0_; + iS_1 = iS_1_; + iS_2 = iS_2_; + iS_3 = ZERO; + + fS_0 = ZERO; + fS_1 = iS_0_; + fS_2 = iS_1_; + fS_3 = iS_2_; + } else if (i_init_min > i_fin_min) { + i_min = i_fin_min; + iS_0 = ZERO; + iS_1 = iS_0_; + iS_2 = iS_1_; + iS_3 = iS_2_; + + fS_0 = iS_0_; + fS_1 = iS_1_; + fS_2 = iS_2_; + fS_3 = ZERO; + } else { + i_min = i_init_min; + iS_0 = iS_0_; + iS_1 = iS_1_; + iS_2 = iS_2_; + iS_3 = ZERO; + + fS_0 = iS_0_; + fS_1 = iS_1_; + fS_2 = iS_2_; + fS_3 = ZERO; + } + } + +} // namespace prtl_shape + +#endif // KERNELS_PARTICLE_SHAPES_HPP From 5aa4814efef205273659f2b33080f84b8a2885b1 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 5 Aug 2025 16:38:54 -0400 Subject: [PATCH 050/154] shape func fix --- src/kernels/particle_shapes.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 97b5bde47..bff8853ae 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -32,7 +32,7 @@ namespace prtl_shape { S2 = ONE - S0 - S1; } else { i_min = i; - S0 = HALF * SQR(THREE_FOURTHS - di); + S0 = HALF * SQR(static_cast(1.5) - di); S2 = HALF * SQR(di - HALF); S1 = ONE - S0 - S2; } From 7099a8813d0c85ab8ca8be7e893f2f16420fedbb Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Tue, 5 Aug 2025 23:40:17 -0500 Subject: [PATCH 051/154] generalized shape function to 5th order, cleanup and removal of staggered grid call --- src/global/utils/numeric.h | 2 + src/kernels/currents_deposit.hpp | 714 ++++++++----------------------- src/kernels/particle_shapes.hpp | 275 ++++++++++-- 3 files changed, 400 insertions(+), 591 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index fd1ddc657..63f23d3e2 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -36,6 +36,7 @@ inline constexpr float TWO = 2.0f; inline constexpr float THREE = 3.0f; inline constexpr float FOUR = 4.0f; inline constexpr float FIVE = 5.0f; +inline constexpr float SIX = 6.0f; inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; @@ -53,6 +54,7 @@ inline constexpr double TWO = 2.0; inline constexpr double THREE = 3.0; inline constexpr double FOUR = 4.0; inline constexpr double FIVE = 5.0; +inline constexpr double SIX = 6.0; inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1feb7ba4e..492dec5c1 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -295,395 +295,6 @@ namespace kernel { i_min += N_GHOSTS; } - Inline void W(real_t* _S, real_t x) const { - - if constexpr (O == 2) { - - _S[0] = HALF * SQR(HALF - x); - _S[1] = THREE_FOURTHS - SQR(x); - _S[2] = HALF * SQR(HALF + x); - - } else if constexpr (O == 3) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - - _S[0] = static_cast(1 / 6) * (ONE - x3) - HALF * SQR(x - x2); - _S[1] = static_cast(2 / 3) - x2 + HALF * x3; - _S[2] = static_cast(1 / 6) + HALF * (x + x2 + x3); - _S[3] = static_cast(1 / 6) * x3; - - } else if constexpr (O == 4) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - - _S[0] = static_cast(1 / 384) - static_cast(1 / 48) * x + - static_cast(1 / 16) * x2 - - static_cast(1 / 12) * x3 + - static_cast(1 / 24) * x4; - _S[1] = static_cast(19 / 96) - static_cast(11 / 24) * x + - static_cast(1 / 4) * x2 + - static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; - _S[2] = static_cast(115 / 192) - static_cast(5 / 8) * x2 + - static_cast(1 / 4) * x4; - _S[3] = static_cast(19 / 96) + static_cast(11 / 24) * x + - static_cast(1 / 4) * x2 - - static_cast(1 / 6) * x3 - static_cast(1 / 6) * x4; - _S[4] = static_cast(1 / 384) + static_cast(1 / 48) * x + - static_cast(1 / 16) * x2 + - static_cast(1 / 12) * x3 + - static_cast(1 / 24) * x4; - - } else if constexpr (O == 5) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - - _S[0] = static_cast(1.0 / 46080.0) - - static_cast(1.0 / 3840.0) * x + - static_cast(1.0 / 384.0) * x2 - - static_cast(1.0 / 96.0) * x3 + - static_cast(1.0 / 72.0) * x4 - - static_cast(1.0 / 144.0) * x5 + - static_cast(1.0 / 720.0) * x6; - - _S[1] = static_cast(13.0 / 9216.0) - - static_cast(11.0 / 768.0) * x + - static_cast(1.0 / 48.0) * x2 + - static_cast(5.0 / 72.0) * x3 - - static_cast(1.0 / 8.0) * x4 + - static_cast(5.0 / 144.0) * x5 - - static_cast(1.0 / 144.0) * x6; - - _S[2] = static_cast(115.0 / 768.0) - - static_cast(5.0 / 24.0) * x2 + - static_cast(1.0 / 8.0) * x4 - - static_cast(1.0 / 72.0) * x6; - - _S[3] = static_cast(115.0 / 768.0) - - static_cast(5.0 / 24.0) * x2 + - static_cast(1.0 / 8.0) * x4 - - static_cast(1.0 / 72.0) * x6; - - _S[4] = static_cast(13.0 / 9216.0) + - static_cast(11.0 / 768.0) * x + - static_cast(1.0 / 48.0) * x2 - - static_cast(5.0 / 72.0) * x3 - - static_cast(1.0 / 8.0) * x4 - - static_cast(5.0 / 144.0) * x5 - - static_cast(1.0 / 144.0) * x6; - - _S[5] = static_cast(1.0 / 46080.0) + - static_cast(1.0 / 3840.0) * x + - static_cast(1.0 / 384.0) * x2 + - static_cast(1.0 / 96.0) * x3 + - static_cast(1.0 / 72.0) * x4 + - static_cast(1.0 / 144.0) * x5 + - static_cast(1.0 / 720.0) * x6; - - } else if constexpr (O == 6) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - - _S[0] = static_cast(1.0 / 40320.0) - - static_cast(1.0 / 4480.0) * x + - static_cast(1.0 / 640.0) * x2 - - static_cast(1.0 / 192.0) * x3 + - static_cast(1.0 / 144.0) * x4 - - static_cast(1.0 / 288.0) * x5 + - static_cast(1.0 / 1440.0) * x6; - - _S[1] = static_cast(1.0 / 1344.0) - - static_cast(1.0 / 160.0) * x + - static_cast(5.0 / 192.0) * x2 - - static_cast(1.0 / 48.0) * x3 - - static_cast(1.0 / 48.0) * x4 + - static_cast(5.0 / 288.0) * x5 - - static_cast(1.0 / 288.0) * x6; - - _S[2] = static_cast(17.0 / 336.0) - - static_cast(5.0 / 48.0) * x2 + - static_cast(1.0 / 12.0) * x4 - - static_cast(1.0 / 144.0) * x6; - - _S[3] = static_cast(151.0 / 252.0) - - static_cast(35.0 / 48.0) * x2 + - static_cast(5.0 / 12.0) * x4 - - static_cast(1.0 / 36.0) * x6; - - _S[4] = static_cast(17.0 / 336.0) - - static_cast(5.0 / 48.0) * x2 + - static_cast(1.0 / 12.0) * x4 - - static_cast(1.0 / 144.0) * x6; - - _S[5] = static_cast(1.0 / 1344.0) + - static_cast(1.0 / 160.0) * x + - static_cast(5.0 / 192.0) * x2 + - static_cast(1.0 / 48.0) * x3 - - static_cast(1.0 / 48.0) * x4 - - static_cast(5.0 / 288.0) * x5 - - static_cast(1.0 / 288.0) * x6; - - _S[6] = static_cast(1.0 / 40320.0) + - static_cast(1.0 / 4480.0) * x + - static_cast(1.0 / 640.0) * x2 + - static_cast(1.0 / 192.0) * x3 + - static_cast(1.0 / 144.0) * x4 + - static_cast(1.0 / 288.0) * x5 + - static_cast(1.0 / 1440.0) * x6; - - } else if constexpr (O == 7) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - const auto x7 = x4 * x3; - - _S[0] = static_cast(1.0 / 645120.0) - - static_cast(1.0 / 64512.0) * x + - static_cast(1.0 / 9216.0) * x2 - - static_cast(1.0 / 3072.0) * x3 + - static_cast(1.0 / 2304.0) * x4 - - static_cast(1.0 / 4608.0) * x5 + - static_cast(1.0 / 23040.0) * x6 - - static_cast(1.0 / 161280.0) * x7; - - _S[1] = static_cast(1.0 / 9216.0) - - static_cast(5.0 / 4608.0) * x + - static_cast(35.0 / 9216.0) * x2 - - static_cast(7.0 / 768.0) * x3 - - static_cast(7.0 / 1152.0) * x4 + - static_cast(35.0 / 4608.0) * x5 - - static_cast(5.0 / 4608.0) * x6 + - static_cast(1.0 / 9216.0) * x7; - - _S[2] = static_cast(25.0 / 1536.0) - - static_cast(35.0 / 768.0) * x2 + - static_cast(7.0 / 192.0) * x4 - - static_cast(1.0 / 96.0) * x6; - - _S[3] = static_cast(245.0 / 384.0) - - static_cast(245.0 / 192.0) * x2 + - static_cast(49.0 / 48.0) * x4 - - static_cast(7.0 / 72.0) * x6; - - _S[4] = _S[3]; // symmetry - - _S[5] = _S[2]; // symmetry - - _S[6] = static_cast(1 / 9216) + static_cast(5 / 4608) * x + - static_cast(35 / 9216) * x2 + - static_cast(7 / 768) * x3 - - static_cast(7 / 1152) * x4 - - static_cast(35 / 4608) * x5 - - static_cast(5 / 4608) * x6 - - static_cast(1 / 9216) * x7; - - _S[7] = static_cast(1 / 645120) + - static_cast(1 / 64512) * x + - static_cast(1 / 9216) * x2 + - static_cast(1 / 3072) * x3 + - static_cast(1 / 2304) * x4 + - static_cast(1 / 4608) * x5 + - static_cast(1 / 23040) * x6 + - static_cast(1 / 161280) * x7; - - } else if constexpr (O == 8) { - - const auto x2 = x * x; - const auto x3 = x2 * x; - const auto x4 = x2 * x2; - const auto x5 = x3 * x2; - const auto x6 = x3 * x3; - const auto x7 = x4 * x3; - const auto x8 = x4 * x4; - - _S[0] = static_cast(1.0 / 10321920.0) - - static_cast(1.0 / 1146880.0) * x + - static_cast(1.0 / 161280.0) * x2 - - static_cast(1.0 / 53760.0) * x3 + - static_cast(1.0 / 43008.0) * x4 - - static_cast(1.0 / 96768.0) * x5 + - static_cast(1.0 / 645120.0) * x6 - - static_cast(1.0 / 1032192.0) * x7 + - static_cast(1.0 / 4134528.0) * x8; - - _S[1] = static_cast(1.0 / 129024.0) - - static_cast(1.0 / 14336.0) * x + - static_cast(17.0 / 43008.0) * x2 - - static_cast(17.0 / 21504.0) * x3 + - static_cast(17.0 / 21504.0) * x4 - - static_cast(17.0 / 43008.0) * x5 + - static_cast(1.0 / 14336.0) * x6 - - static_cast(1.0 / 129024.0) * x7 + - static_cast(1.0 / 1032192.0) * x8; - - _S[2] = static_cast(361.0 / 64512.0) - - static_cast(153.0 / 14336.0) * x2 + - static_cast(51.0 / 14336.0) * x4 - - static_cast(17.0 / 43008.0) * x6 + - static_cast(1.0 / 1032192.0) * x8; - - _S[3] = static_cast(3061.0 / 16128.0) - - static_cast(170.0 / 1792.0) * x2 + - static_cast(34.0 / 1536.0) * x4 - - static_cast(17.0 / 16128.0) * x6; - - _S[4] = static_cast(257135.0 / 32256.0) - - static_cast(1785.0 / 896.0) * x2 + - static_cast(255.0 / 256.0) * x4 - - static_cast(85.0 / 1152.0) * x6; - - _S[5] = _S[3]; // symmetry - - _S[6] = _S[2]; // symmetry - - _S[7] = static_cast(1 / 129024) + - static_cast(1 / 14336) * x + - static_cast(17 / 43008) * x2 + - static_cast(17 / 21504) * x3 + - static_cast(17 / 21504) * x4 + - static_cast(17 / 43008) * x5 + - static_cast(1 / 14336) * x6 + - static_cast(1 / 129024) * x7 + - static_cast(1 / 1032192) * x8; - - _S[8] = static_cast(1 / 10321920) + - static_cast(1 / 1146880) * x + - static_cast(1 / 161280) * x2 + - static_cast(1 / 53760) * x3 + - static_cast(1 / 43008) * x4 + - static_cast(1 / 96768) * x5 + - static_cast(1 / 645120) * x6 + - static_cast(1 / 1032192) * x7 + - static_cast(1 / 4134528) * x8; - - } else { - raise::KernelError(HERE, "Invalid order of shape function!"); - } - } - - Inline void shape_function_Nth(real_t* S0, - real_t* S1, - ncells_t& i_min, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a O+1 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 ... N N+1 - __________________________________________ - | | x* | x* | // | x* | | // shift_i = 0 - |______|______|______|______|______|______| - | | x | x* | // | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - | * | x* | x* | // | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - // find shift in indices - // ToDo: fix - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function -> ToDo! - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function -> ToDo! - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center = static_cast(1 - di_less_half) - di; - // ToDo: end fix - - real_t _S0[O + 1], _S1[O + 1]; - // apply shape function - W(_S0, di_center_prev); - W(_S1, di_center); - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | | x | x* | // | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - */ - - for (int j = 0; j < O; j++) { - S0[j] = _S0[j]; - } - S0[O + 1] = ZERO; - - S1[0] = ZERO; - for (int j = 0; j < O; j++) { - S1[j + 1] = _S1[j]; - } - - } else if (shift_i == -1) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | * | x* | x* | // | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - S0[0] = ZERO; - for (int j = 0; j < O; j++) { - S0[j + 1] = _S0[j]; - } - - for (int j = 0; j < O; j++) { - S1[j] = _S1[j]; - } - S1[O + 1] = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 ... N N+1 - __________________________________________ - | | x* | x* | // | x* | | // shift_i = 0 - |______|______|______|______|______|______| - */ - - for (int j = 0; j < O; j++) { - S0[j] = _S0[j]; - } - S0[O + 1] = ZERO; - - for (int j = 0; j < O; j++) { - S1[j] = _S1[j]; - } - S1[O + 1] = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } - public: /** * @brief explicit constructor. @@ -1036,96 +647,76 @@ namespace kernel { // iS -> shape function for init position // fS -> shape function for final position - // shape function at staggered points (one coeff is always ZERO) - int i1_minH; - real_t iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3; - real_t fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3; - // shape function at integer points (one coeff is always ZERO) int i1_min; real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; // clang-format off - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_minH, - iS_x1H_0, iS_x1H_1, iS_x1H_2, iS_x1H_3, - fS_x1H_0, fS_x1H_1, fS_x1H_2, fS_x1H_3); - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_min, - iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); + prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + i1(p), static_cast(dx1(p)), + i1_min, + iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); // clang-format on if constexpr (D == Dim::_1D) { raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { - // shape function at staggered points (one coeff is always ZERO) - int i2_minH; - real_t iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3; - real_t fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3; - // shape function at integer points (one coeff is always ZERO) int i2_min; real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; // clang-format off - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_minH, - iS_x2H_0, iS_x2H_1, iS_x2H_2, iS_x2H_3, - fS_x2H_0, fS_x2H_1, fS_x2H_2, fS_x2H_3); - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_min, - iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); + prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + i2(p), static_cast(dx2(p)), + i2_min, + iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); // clang-format on // x1-components - const auto Wx1_00 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_0 + iS_x2_0); - const auto Wx1_01 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_1 + iS_x2_1); - const auto Wx1_02 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_2 + iS_x2_2); - const auto Wx1_03 = HALF * (fS_x1H_0 - iS_x1H_0) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_10 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_0 + iS_x2_0); - const auto Wx1_11 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_1 + iS_x2_1); - const auto Wx1_12 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_2 + iS_x2_2); - const auto Wx1_13 = HALF * (fS_x1H_1 - iS_x1H_1) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_20 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_0 + iS_x2_0); - const auto Wx1_21 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_1 + iS_x2_1); - const auto Wx1_22 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_2 + iS_x2_2); - const auto Wx1_23 = HALF * (fS_x1H_2 - iS_x1H_2) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_30 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_0 + iS_x2_0); - const auto Wx1_31 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_1 + iS_x2_1); - const auto Wx1_32 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_2 + iS_x2_2); - const auto Wx1_33 = HALF * (fS_x1H_3 - iS_x1H_3) * (fS_x2_3 + iS_x2_3); + const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); + const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); + const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); + const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); + const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); + const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); + const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); + const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); + const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); + const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + + const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); + const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); + const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); + const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); // x2-components - const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2H_3 - iS_x2H_3); - - const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_0 - iS_x2H_0); - const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_1 - iS_x2H_1); - const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_2 - iS_x2H_2); - const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2H_3 - iS_x2H_3); + const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); + const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); + const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); + const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); + const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); + const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); + const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); + const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); + const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); + const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + + const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); + const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); + const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); + const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); // x3-components const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + @@ -1206,9 +797,7 @@ namespace kernel { const auto jx2_32 = jx2_31 + Wx2_32; const auto jx2_33 = jx2_32 + Wx2_33; - i1_minH += N_GHOSTS; i1_min += N_GHOSTS; - i2_minH += N_GHOSTS; i2_min += N_GHOSTS; // @TODO: not sure about the signs here @@ -1219,46 +808,46 @@ namespace kernel { auto J_acc = J.access(); // x1-currents - J_acc(i1_minH + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - J_acc(i1_minH + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - J_acc(i1_minH + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - J_acc(i1_minH + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - J_acc(i1_minH + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - J_acc(i1_minH + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - J_acc(i1_minH + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - J_acc(i1_minH + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - J_acc(i1_minH + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - J_acc(i1_minH + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - J_acc(i1_minH + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - J_acc(i1_minH + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - J_acc(i1_minH + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - J_acc(i1_minH + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - J_acc(i1_minH + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - J_acc(i1_minH + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; // x2-currents - J_acc(i1_min + 0, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_00; - J_acc(i1_min + 0, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_01; - J_acc(i1_min + 0, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_02; - J_acc(i1_min + 0, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_03; - - J_acc(i1_min + 1, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_10; - J_acc(i1_min + 1, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_11; - J_acc(i1_min + 1, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_12; - J_acc(i1_min + 1, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_13; - - J_acc(i1_min + 2, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_20; - J_acc(i1_min + 2, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_21; - J_acc(i1_min + 2, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_22; - J_acc(i1_min + 2, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_23; - - J_acc(i1_min + 3, i2_minH + 0, cur::jx2) += Qdx2dt * jx2_30; - J_acc(i1_min + 3, i2_minH + 1, cur::jx2) += Qdx2dt * jx2_31; - J_acc(i1_min + 3, i2_minH + 2, cur::jx2) += Qdx2dt * jx2_32; - J_acc(i1_min + 3, i2_minH + 3, cur::jx2) += Qdx2dt * jx2_33; + J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; + J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; + J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; + J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + + J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; + J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; + J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; + J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + + J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; + J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; + J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; + J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + + J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; + J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; + J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; + J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; // x3-currents J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; @@ -1280,6 +869,7 @@ namespace kernel { J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + } else if constexpr (D == Dim::_3D) { // /* // y - direction @@ -2549,30 +2139,44 @@ namespace kernel { } // dim -> ToDo: 3D! - } else if constexpr (O > 3u) { + } else if constexpr ((O > 3u) && (O < 5u)) { // shape function in dim1 -> always required - real_t S0x[O + 2], S1x[O + 2]; + real_t iS_x1[O + 2], fS_x1[O + 2]; // indices of the shape function - ncells_t ix_min; + ncells_t i1_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i1_prev(p), + static_cast(dx1_prev(p)), + i1(p), + static_cast(dx1(p)), + i1_min, + iS_x1, + fS_x1); if constexpr (D == Dim::_1D) { // ToDo } else if constexpr (D == Dim::_2D) { - // shape function in dim2 - real_t S0y[O + 2], S1y[O + 2]; + // shape function in dim1 -> always required + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t iy_min; + ncells_t i2_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i2_prev(p), + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + iS_x2, + fS_x2); // define weight tensors - real_t Wx[O + 1][O + 1]; - real_t Wy[O + 1][O + 1]; - real_t Wz[O + 1][O + 1]; + real_t Wx[O + 2][O + 2]; + real_t Wy[O + 2][O + 2]; + real_t Wz[O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -2580,51 +2184,54 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 - Wx[i][j] = (S1x[i] - S0x[i]) * (S0y[j] + HALF * (S1y[j] - S0y[j])); + Wx[i][j] = (fS_x1[i] - iS_x1[i]) * + (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); - Wy[i][j] = (S1y[i] - S0y[i]) * (S0y[j] + HALF * (S1x[j] - S0x[j])); + Wy[i][j] = (fS_x2[j] - iS_x2[j]) * + (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); - Wz[i][j] = S0x[i] * S0y[j] + HALF * (S1x[i] - S1x[i]) * S0y[j] + - HALF * S0x[i] * (S1y[j] - S0y[j]) + - THIRD * (S1x[i] - S0x[i]) * (S1y[j] - S0y[j]); + Wz[i][j] = iS_x1[i] * iS_x2[j] + + HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + + HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + + THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); } } // contribution within the shape function stencil real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; - // prefactors to j update - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; + // prefactors for j update + const real_t Qdx1dt = -coeff * inv_dt; + const real_t Qdx2dt = -coeff * inv_dt; + const real_t QVx3 = coeff * vp[2]; // Calculate current contribution // jx #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[0][j] = -Qdxdt * Wx[0][j]; + jx[0][j] = Wx[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[i][j] = jx[i - 1][j] - Qdxdt * Wx[i][j]; + jx[i][j] = jx[i - 1][j] + Wx[i][j]; } } // jy #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][0] = -Qdydt * Wy[i][0]; + jy[i][0] = Wy[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][j] = jy[i][j - 1] - Qdydt * Wy[i][j]; + jy[i][j] = jy[i][j - 1] + Wy[i][j]; } } @@ -2633,39 +2240,56 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jz[i][j] = QVz * Wz[i][j]; + jz[i][j] = Wz[i][j]; } } + // account for ghost cells + i1_min += N_GHOSTS; + i2_min += N_GHOSTS; + /* Current update - */ + */ auto J_acc = J.access(); #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - J_acc(ix_min + i, iy_min + j, cur::jx1) += jx[i][j]; - J_acc(ix_min + i, iy_min + j, cur::jx2) += jy[i][j]; - J_acc(ix_min + i, iy_min + j, cur::jx3) += jz[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jy[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jz[i][j]; } } } else if constexpr (D == Dim::_3D) { // shape function in dim2 - real_t S0y[O + 2], S1y[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t iy_min; - - // ToDo: Call shape function + ncells_t i2_min; + // call shape function + prtl_shape::for_deposit(i2_prev(p), + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + iS_x2, + fS_x2); // shape function in dim3 - real_t S0z[O + 2], S1z[O + 2]; + real_t iS_x3[O + 2], fS_x3[O + 2]; // indices of the shape function - ncells_t iz_min; + ncells_t i3_min; - // ToDo: Call shape function + // call shape function + prtl_shape::for_deposit(i3_prev(p), + static_cast(dx3_prev(p)), + i3(p), + static_cast(dx3(p)), + i3_min, + iS_x3, + fS_x3); // define weight tensors real_t Wx[O + 1][O + 1][O + 1]; @@ -2680,17 +2304,17 @@ namespace kernel { #pragma unroll for (int k = 0; k < O + 2; ++k) { // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (S1x[i] - S0x[i]) * - ((S0y[j] * S0z[k] + S1y[j] * S1z[k]) + - HALF * (S0z[k] * S1y[j] + S0y[j] * S1z[k])); + Wx[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * + ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + + HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); - Wy[i][j][k] = THIRD * (S1y[j] - S0y[j]) * - (S0x[i] * S0z[k] + S1x[i] * S1z[k] + - HALF * (S0z[k] * S1x[i] + S0x[i] * S1z[k])); + Wy[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * + (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + + HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); - Wz[i][j][k] = THIRD * (S1z[k] - S0z[k]) * - (S0x[i] * S0y[j] + S1x[i] * S1y[j] + - HALF * (S0x[i] * S1y[j] + S0y[j] * S1x[i])); + Wz[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * + (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + + HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); } } } @@ -2777,9 +2401,9 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx1) += jx[i][j][k]; - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx2) += jy[i][j][k]; - J_acc(ix_min + i, iy_min + j, iz_min, cur::jx3) += jz[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jy[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jz[i][j][k]; } } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index bff8853ae..7a0af8b75 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,34 +17,155 @@ namespace prtl_shape { - template - Inline void order_2nd(const int& i, - const real_t& di, - int& i_min, - real_t& S0, - real_t& S1, - real_t& S2) { - if constexpr (not STAGGERED) { // compute at i positions - if (di < HALF) { + template + Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { + if constexpr (O == 2u) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 1; + S[0] = HALF * SQR(HALF - di); + S[1] = THREE_FOURTHS - SQR(di); + S[2] = ONE - S[0] - S[1]; + } else { + i_min = i; + S[0] = HALF * SQR(static_cast(1.5) - di); + S[2] = HALF * SQR(di - HALF); + S[1] = ONE - S[0] - S[2]; + } + } else { // compute at i + 1/2 positions i_min = i - 1; - S0 = HALF * SQR(HALF - di); - S1 = THREE_FOURTHS - SQR(di); - S2 = ONE - S0 - S1; - } else { - i_min = i; - S0 = HALF * SQR(static_cast(1.5) - di); - S2 = HALF * SQR(di - HALF); - S1 = ONE - S0 - S2; - } - } else { // compute at i + 1/2 positions - i_min = i - 1; - S1 = HALF + di - SQR(di); - S2 = HALF * SQR(di); - S0 = ONE - S1 - S2; + S[1] = THREE_FOURTHS - SQR(di - HALF); + S[2] = HALF * SQR(di); + S[0] = ONE - S[1] - S[2]; + } // staggered + } else if constexpr (O == 3u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 2; + S[0] = HALF * THIRD * CUBE(ONE - di); + S[3] = HALF * THIRD * CUBE(di); + S[1] = HALF * THIRD * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[2] = ONE - S[0] - S[1] - S[3]; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 2; + S[0] = HALF * THIRD * CUBE(HALF - di); + S[3] = HALF * THIRD * CUBE(HALF + di); + S[1] = HALF * THIRD * + (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); + S[2] = ONE - S[0] - S[1] - S[3]; + } else { + i_min = i - 1; + S[0] = HALF * THIRD * CUBE(HALF + di); + S[3] = HALF * THIRD * CUBE(HALF + di); + S[1] = HALF * THIRD * + (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[2] = ONE - S[0] - S[1] - S[3]; + } + } // staggered + } else if constexpr (O == 4u) { + // 1/25 * ( 5/2 - |x|)^4 |x| < 3/2 + // S(x) = 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 2; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(HALF - di)); + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(HALF + di)); + S[1] = FIVE * INV_8 - SQR(ONE + di) + + static_cast(32 / 45) * CUBE(ONE + di) - + static_cast(98 / 675) * SQR(SQR(ONE + di)); + S[2] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + static_cast(98 / 675) * SQR(SQR(di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { + i_min = i - 1; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(THREE * HALF - di)); + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di - HALF)); + S[1] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + static_cast(98 / 675) * SQR(SQR(di)); + S[2] = FIVE * INV_8 - SQR(ONE - di) + + static_cast(32 / 45) * CUBE(ONE - di) - + static_cast(98 / 675) * SQR(SQR(ONE - di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } + } else { // compute at i + 1/2 positions + i_min = i - 2; + S[0] = ONE / (FIVE * FIVE) * SQR(SQR(ONE - di)); // + S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di)); // + S[1] = FIVE * INV_8 - SQR(HALF + di) + + static_cast(32 / 45) * CUBE(HALF + di) - + static_cast(98 / 675) * SQR(SQR(HALF + di)); + S[2] = FIVE * INV_8 - SQR(HALF - di) + + static_cast(32 / 45) * CUBE(HALF - di) - + static_cast(98 / 675) * SQR(SQR(HALF - di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } // staggered + } else if constexpr (O == 5u) { + // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 + // S(x) = 1/135 * (3 - |x|)^5 2 ≤ |x| < 3 + // 0.0 |x| ≥ 3 + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 2; + S[0] = static_cast(1 / 135) * SQR(CUBE(ONE - di)); // + S[1] = static_cast(3 / 5) - SQR(ONE + di) + + static_cast(5 / 6) * CUBE(ONE + di) - + static_cast(19 / 72) * SQR(SQR(ONE + di)) + + static_cast(13 / 432) * SQR(CUBE(ONE + di)); + S[2] = static_cast(3 / 5) - SQR(di) + + static_cast(5 / 6) * CUBE(di) - + static_cast(19 / 72) * SQR(SQR(di)) + + static_cast(13 / 432) * SQR(CUBE(di)); + S[3] = static_cast(3 / 5) - SQR(ONE - di) + + static_cast(5 / 6) * CUBE(ONE - di) - + static_cast(19 / 72) * SQR(SQR(ONE - di)) + + static_cast(13 / 432) * SQR(CUBE(ONE - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 3; + S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); + S[1] = static_cast(3 / 5) - SQR(static_cast(1.5) + di) + + static_cast(5 / 6) * CUBE(static_cast(1.5) + di) - + static_cast(19 / 72) * + SQR(SQR(static_cast(1.5) + di)) + + static_cast(13 / 432) * + SQR(CUBE(static_cast(1.5) + di)); + S[2] = static_cast(3 / 5) - SQR(HALF + di) + + static_cast(5 / 6) * CUBE(HALF + di) - + static_cast(19 / 72) * SQR(SQR(HALF + di)) + + static_cast(13 / 432) * SQR(CUBE(HALF + di)); + S[3] = static_cast(3 / 5) - SQR(HALF - di) + + static_cast(5 / 6) * CUBE(HALF - di) - + static_cast(19 / 72) * SQR(SQR(HALF - di)) + + static_cast(13 / 432) * SQR(CUBE(HALF - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(HALF + di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } else { + i_min = i - 2; + S[0] = static_cast(1 / 135) * + SQR(CUBE(static_cast(1.5) - di)); + S[1] = static_cast(3 / 5) - SQR(HALF + di) + + static_cast(5 / 6) * CUBE(HALF + di) - + static_cast(19 / 72) * SQR(SQR(HALF + di)) + + static_cast(13 / 432) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3 / 5) - SQR(di - HALF) + + static_cast(5 / 6) * CUBE(di - HALF) - + static_cast(19 / 72) * SQR(SQR(di - HALF)) + + static_cast(13 / 432) * SQR(CUBE(di - HALF)); + S[3] = static_cast(3 / 5) - SQR(static_cast(1.5) - di) + + static_cast(5 / 6) * CUBE(static_cast(1.5) - di) - + static_cast(19 / 72) * + SQR(SQR(static_cast(1.5) - di)) + + static_cast(13 / 432) * + SQR(CUBE(static_cast(1.5) - di)); + S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + } + } // staggered } } - template Inline void for_deposit_2nd(const int& i_init, const real_t& di_init, const int& i_fin, @@ -60,48 +181,110 @@ namespace prtl_shape { real_t& fS_3) { int i_init_min, i_fin_min; - real_t iS_0_, iS_1_, iS_2_; - real_t fS_0_, fS_1_, fS_2_; + real_t iS_[3], fS_[3]; - order_2nd(i_init, di_init, i_init_min, iS_0_, iS_1_, iS_2_); - order_2nd(i_fin, di_fin, i_fin_min, fS_0_, fS_1_, fS_2_); + order(i_init, di_init, i_init_min, iS_); + order(i_fin, di_fin, i_fin_min, fS_); if (i_init_min < i_fin_min) { i_min = i_init_min; - iS_0 = iS_0_; - iS_1 = iS_1_; - iS_2 = iS_2_; + iS_0 = iS_[0]; + iS_1 = iS_[1]; + iS_2 = iS_[2]; iS_3 = ZERO; fS_0 = ZERO; - fS_1 = iS_0_; - fS_2 = iS_1_; - fS_3 = iS_2_; + fS_1 = iS_[0]; + fS_2 = iS_[1]; + fS_3 = iS_[2]; } else if (i_init_min > i_fin_min) { i_min = i_fin_min; iS_0 = ZERO; - iS_1 = iS_0_; - iS_2 = iS_1_; - iS_3 = iS_2_; + iS_1 = iS_[0]; + iS_2 = iS_[1]; + iS_3 = iS_[2]; - fS_0 = iS_0_; - fS_1 = iS_1_; - fS_2 = iS_2_; + fS_0 = iS_[0]; + fS_1 = iS_[1]; + fS_2 = iS_[2]; fS_3 = ZERO; } else { i_min = i_init_min; - iS_0 = iS_0_; - iS_1 = iS_1_; - iS_2 = iS_2_; + iS_0 = iS_[0]; + iS_1 = iS_[1]; + iS_2 = iS_[2]; iS_3 = ZERO; - fS_0 = iS_0_; - fS_1 = iS_1_; - fS_2 = iS_2_; + fS_0 = iS_[0]; + fS_1 = iS_[1]; + fS_2 = iS_[2]; fS_3 = ZERO; } } + template + Inline void for_deposit(const int& i_init, + const real_t& di_init, + const int& i_fin, + const real_t& di_fin, + int& i_min, + real_t* iS, + real_t* fS) { + + int i_init_min, i_fin_min; + + real_t iS_[O + 1], fS_[O + 1]; + + order(i_init, di_init, i_init_min, iS_); + order(i_fin, di_fin, i_fin_min, fS_); + + if (i_init_min < i_fin_min) { + i_min = i_init_min; + +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j] = iS_[j]; + } + iS[O + 1] = ZERO; + + fS[0] = ZERO; +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j + 1] = fS_[j]; + } + + } else if (i_init_min > i_fin_min) { + i_min = i_fin_min; + + iS[0] = ZERO; +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j + 1] = iS_[j]; + } + +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j] = fS_[j]; + } + fS[O + 1] = ZERO; + + } else { + i_min = i_init_min; + +#pragma unroll + for (int j = 0; j < O; j++) { + iS[j] = iS_[j]; + } + iS[O + 1] = ZERO; + +#pragma unroll + for (int j = 0; j < O; j++) { + fS[j] = fS_[j]; + } + fS[O + 1] = ZERO; + } + } + } // namespace prtl_shape #endif // KERNELS_PARTICLE_SHAPES_HPP From 9f6f318d95395d68337b4ad84f32257f9dfdc896 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 10:57:08 -0500 Subject: [PATCH 052/154] improved comments and cleanup --- src/kernels/currents_deposit.hpp | 249 ------------------------------- src/kernels/particle_shapes.hpp | 118 +++++++++++---- 2 files changed, 86 insertions(+), 281 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 492dec5c1..a4d62aa83 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -46,255 +46,6 @@ namespace kernel { const M metric; const real_t charge, inv_dt; - // Inline void shape_function_2nd(real_t& S0_0, - // real_t& S0_1, - // real_t& S0_2, - // real_t& S0_3, - // real_t& S1_0, - // real_t& S1_1, - // real_t& S1_2, - // real_t& S1_3, - // ncells_t& i_min, - // bool& update_i2, - // const index_t& i, - // const real_t& di, - // const index_t& i_prev, - // const real_t& di_prev) const { - // /* - // Shape function per particle is a 4 element array. - // We need to find which indices are contributing to the shape function - // For this we first compute the indices of the particle position - // - // Let * be the particle position at the current timestep - // Let x be the particle position at the previous timestep - // - // - // (-1) 0 1 2 3 - // ___________________________________ - // | | x* | x* | x* | | // shift_i = 0 - // |______|______|______|______|______| - // | | x | x* | x* | * | // shift_i = 1 - // |______|______|______|______|______| - // | * | x* | x* | x | | // shift_i = -1 - // |______|______|______|______|______| - // */ - // - // // find shift in indices - // const int di_less_half = static_cast(di < static_cast(0.5)); - // const int di_prev_less_half = static_cast( - // di_prev < static_cast(0.5)); - // - // const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - // - // // find the minimum index of the shape function - // i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - // - // // center index of the shape function - // const auto di_center_prev = static_cast(1 - di_prev_less_half) - - // di_prev; - // const auto di_center = static_cast(1 - di_less_half) - di; - // - // // find indices and define shape function - // if (shift_i == 1) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | | x | x* | x* | * | // shift_i = 1 - // |______|______|______|______|______| - // */ - // update_i2 = true; - // - // S0_0 = HALF * SQR(HALF + di_center_prev); - // S0_1 = THREE_FOURTHS - SQR(di_center_prev); - // S0_2 = HALF * SQR(HALF - di_center_prev); - // S0_3 = ZERO; - // - // S1_0 = ZERO; - // S1_1 = HALF * SQR(HALF + di_center); - // S1_2 = THREE_FOURTHS - SQR(di_center); - // S1_3 = HALF * SQR(HALF - di_center); - // } else if (shift_i == -1) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | * | x* | x* | x | | // shift_i = -1 - // |______|______|______|______|______| - // */ - // update_i2 = true; - // - // S0_0 = ZERO; - // S0_1 = HALF * SQR(HALF + di_center_prev); - // S0_2 = THREE_FOURTHS - SQR(di_center_prev); - // S0_3 = HALF * SQR(HALF - di_center_prev); - // - // S1_0 = HALF * SQR(HALF + di_center); - // S1_1 = THREE_FOURTHS - SQR(di_center); - // S1_2 = HALF * SQR(HALF - di_center); - // S1_3 = ZERO; - // - // } else if (shift_i == 0) { - // /* - // (-1) 0 1 2 3 - // ___________________________________ - // | | x* | x* | x* | | // shift_i = 0 - // |______|______|______|______|______| - // */ - // update_i2 = false; - // - // S0_0 = HALF * SQR(HALF + di_center_prev); - // S0_1 = THREE_FOURTHS - SQR(di_center_prev); - // S0_2 = HALF * SQR(HALF - di_center_prev); - // S0_3 = ZERO; - // - // S1_0 = HALF * SQR(HALF + di_center); - // S1_1 = THREE_FOURTHS - SQR(di_center); - // S1_2 = HALF * SQR(HALF - di_center); - // S1_3 = ZERO; - // } else { - // raise::KernelError(HERE, "Invalid shift in indices"); - // } - // - // // account for ghost cells here to shorten J update expression - // i_min += N_GHOSTS; - // } - - Inline void shape_function_3rd(real_t& S0_0, - real_t& S0_1, - real_t& S0_2, - real_t& S0_3, - real_t& S0_4, - real_t& S1_0, - real_t& S1_1, - real_t& S1_2, - real_t& S1_3, - real_t& S1_4, - ncells_t& i_min, - bool& update_i3, - const index_t& i, - const real_t& di, - const index_t& i_prev, - const real_t& di_prev) const { - /* - Shape function per particle is a 4 element array. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - (-1) 0 1 2 3 4 - __________________________________________ - | | x* | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______|______| - | | x | x* | x* | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - | * | x* | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______|______| - */ - - // find shift in indices - const int di_less_half = static_cast(di < static_cast(0.5)); - const int di_prev_less_half = static_cast( - di_prev < static_cast(0.5)); - - const int shift_i = (i - di_less_half) - (i_prev - di_prev_less_half); - - // find the minimum index of the shape function - i_min = Kokkos::min((i - di_less_half), (i_prev - di_prev_less_half)); - - // center index of the shape function - const auto di_center_prev = static_cast(1 - di_prev_less_half) - - di_prev; - const auto di_center_prev2 = SQR(di_center_prev); - const auto di_center_prev3 = di_center_prev2 * di_center_prev; - - const auto di_center = static_cast(1 - di_less_half) - di; - const auto di_center2 = SQR(di_center); - const auto di_center3 = di_center2 * di_center; - - // find indices and define shape function - if (shift_i == 1) { - /* - (-1) 0 1 2 3 4 - __________________________________________ - | | x | x* | x* | x* | * | // shift_i = 1 - |______|______|______|______|______|______| - */ - update_i3 = true; - - S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_2 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_3 = static_cast(1 / 6) * di_center_prev3; - S0_4 = ZERO; - - S1_0 = ZERO; - S1_1 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_2 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_3 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_4 = static_cast(1 / 6) * di_center3; - } else if (shift_i == -1) { - /* - (-1) 0 1 2 3 4 - _________________________________________ - | * | x* | x* | x* | x | | // shift_i = -1 - |______|______|______|______|______|_____| - */ - update_i3 = true; - - S0_0 = ZERO; - S0_1 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_2 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_3 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_4 = static_cast(1 / 6) * di_center_prev3; - - S1_0 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_2 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_3 = static_cast(1 / 6) * di_center3; - S1_4 = ZERO; - - } else if (shift_i == 0) { - /* - (-1) 0 1 2 3 4 - __________________________________________ - | | x* | x* | x* | x* | | // shift_i = 0 - |______|______|______|______|______|______| - */ - update_i3 = false; - - S0_0 = static_cast(1 / 6) * (ONE - di_center_prev3) - - HALF * (di_center_prev - di_center_prev2); - S0_1 = static_cast(2 / 3) - di_center_prev2 + HALF * di_center_prev3; - S0_2 = static_cast(1 / 6) + - HALF * (di_center_prev + di_center_prev2 - di_center_prev3); - S0_3 = static_cast(1 / 6) * di_center_prev3; - S0_4 = ZERO; - - S1_0 = static_cast(1 / 6) * (ONE - di_center3) - - HALF * (di_center - di_center2); - S1_1 = static_cast(2 / 3) - di_center2 + HALF * di_center3; - S1_2 = static_cast(1 / 6) + - HALF * (di_center + di_center2 - di_center3); - S1_3 = static_cast(1 / 6) * di_center3; - S1_4 = ZERO; - } else { - raise::KernelError(HERE, "Invalid shift in indices"); - } - - // account for ghost cells here to shorten J update expression - i_min += N_GHOSTS; - } - public: /** * @brief explicit constructor. diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 7a0af8b75..c793ee678 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -20,6 +20,9 @@ namespace prtl_shape { template Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { if constexpr (O == 2u) { + // 3/4 - |x|^2 |x| < 1/2 + // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 + // 0.0 |x| ≥ 3/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 1; @@ -28,7 +31,7 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(1.5) - di); + S[0] = HALF * SQR(static_cast(3 / 2) - di); S[2] = HALF * SQR(di - HALF); S[1] = ONE - S[0] - S[2]; } @@ -39,25 +42,29 @@ namespace prtl_shape { S[0] = ONE - S[1] - S[2]; } // staggered } else if constexpr (O == 3u) { + // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 + // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 + // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = HALF * THIRD * CUBE(ONE - di); - S[3] = HALF * THIRD * CUBE(di); - S[1] = HALF * THIRD * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[2] = ONE - S[0] - S[1] - S[3]; + S[0] = static_cast(1 / 6) * CUBE(ONE - di); + S[3] = static_cast(1 / 6) * CUBE(di); + S[1] = static_cast(1 / 6) * + (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = HALF * THIRD * CUBE(HALF - di); - S[3] = HALF * THIRD * CUBE(HALF + di); - S[1] = HALF * THIRD * + S[0] = static_cast(1 / 6) * CUBE(HALF - di); + S[3] = static_cast(1 / 6) * CUBE(HALF + di); + S[1] = static_cast(1 / 6) * (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = HALF * THIRD * CUBE(HALF + di); - S[3] = HALF * THIRD * CUBE(HALF + di); - S[1] = HALF * THIRD * + S[0] = static_cast(1 / 6) * CUBE(HALF + di); + S[3] = static_cast(1 / 6) * CUBE(HALF + di); + S[1] = static_cast(1 / 6) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -69,33 +76,35 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(HALF - di)); - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(HALF + di)); - S[1] = FIVE * INV_8 - SQR(ONE + di) + + S[0] = static_cast(1 / 25) * SQR(SQR(HALF - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(HALF + di)); + S[1] = static_cast(5 / 8) - SQR(ONE + di) + static_cast(32 / 45) * CUBE(ONE + di) - static_cast(98 / 675) * SQR(SQR(ONE + di)); - S[2] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + S[2] = static_cast(5 / 8) - SQR(di) + + static_cast(32 / 45) * CUBE(di) - static_cast(98 / 675) * SQR(SQR(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 1; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(THREE * HALF - di)); - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di - HALF)); - S[1] = FIVE * INV_8 - SQR(di) + static_cast(32 / 45) * CUBE(di) - + S[0] = static_cast(1 / 25) * SQR(SQR(THREE * HALF - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(di - HALF)); + S[1] = static_cast(5 / 8) - SQR(di) + + static_cast(32 / 45) * CUBE(di) - static_cast(98 / 675) * SQR(SQR(di)); - S[2] = FIVE * INV_8 - SQR(ONE - di) + + S[2] = static_cast(5 / 8) - SQR(ONE - di) + static_cast(32 / 45) * CUBE(ONE - di) - static_cast(98 / 675) * SQR(SQR(ONE - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = ONE / (FIVE * FIVE) * SQR(SQR(ONE - di)); // - S[4] = ONE / (FIVE * FIVE) * SQR(SQR(di)); // - S[1] = FIVE * INV_8 - SQR(HALF + di) + + S[0] = static_cast(1 / 25) * SQR(SQR(ONE - di)); + S[4] = static_cast(1 / 25) * SQR(SQR(di)); + S[1] = static_cast(5 / 8) - SQR(HALF + di) + static_cast(32 / 45) * CUBE(HALF + di) - static_cast(98 / 675) * SQR(SQR(HALF + di)); - S[2] = FIVE * INV_8 - SQR(HALF - di) + + S[2] = static_cast(5 / 8) - SQR(HALF - di) + static_cast(32 / 45) * CUBE(HALF - di) - static_cast(98 / 675) * SQR(SQR(HALF - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; @@ -125,12 +134,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3 / 5) - SQR(static_cast(1.5) + di) + - static_cast(5 / 6) * CUBE(static_cast(1.5) + di) - + S[1] = static_cast(3 / 5) - + SQR(static_cast(3 / 2) + di) + + static_cast(5 / 6) * + CUBE(static_cast(3 / 2) + di) - static_cast(19 / 72) * - SQR(SQR(static_cast(1.5) + di)) + + SQR(SQR(static_cast(3 / 2) + di)) + static_cast(13 / 432) * - SQR(CUBE(static_cast(1.5) + di)); + SQR(CUBE(static_cast(3 / 2) + di)); S[2] = static_cast(3 / 5) - SQR(HALF + di) + static_cast(5 / 6) * CUBE(HALF + di) - static_cast(19 / 72) * SQR(SQR(HALF + di)) + @@ -144,7 +155,7 @@ namespace prtl_shape { } else { i_min = i - 2; S[0] = static_cast(1 / 135) * - SQR(CUBE(static_cast(1.5) - di)); + SQR(CUBE(static_cast(3 / 2) - di)); S[1] = static_cast(3 / 5) - SQR(HALF + di) + static_cast(5 / 6) * CUBE(HALF + di) - static_cast(19 / 72) * SQR(SQR(HALF + di)) + @@ -153,12 +164,14 @@ namespace prtl_shape { static_cast(5 / 6) * CUBE(di - HALF) - static_cast(19 / 72) * SQR(SQR(di - HALF)) + static_cast(13 / 432) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3 / 5) - SQR(static_cast(1.5) - di) + - static_cast(5 / 6) * CUBE(static_cast(1.5) - di) - + S[3] = static_cast(3 / 5) - + SQR(static_cast(3 / 2) - di) + + static_cast(5 / 6) * + CUBE(static_cast(3 / 2) - di) - static_cast(19 / 72) * - SQR(SQR(static_cast(1.5) - di)) + + SQR(SQR(static_cast(3 / 2) - di)) + static_cast(13 / 432) * - SQR(CUBE(static_cast(1.5) - di)); + SQR(CUBE(static_cast(3 / 2) - di)); S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } @@ -179,6 +192,27 @@ namespace prtl_shape { real_t& fS_1, real_t& fS_2, real_t& fS_3) { + + /* + The second order shape function per particle is a 4 element array + where the shape function contributes to only 3 elements. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + 0 1 2 3 + ____________________________ + | x* | x* | x* | | // i_init_min = i_fin_min + |______|______|______|______| + | x | x* | x* | * | // i_init_min < i_fin_min + |______|______|______|______| + | * | x* | x* | x | // i_init_min > i_fin_min + |______|______|______|______| + */ + int i_init_min, i_fin_min; real_t iS_[3], fS_[3]; @@ -231,6 +265,26 @@ namespace prtl_shape { real_t* iS, real_t* fS) { + /* + The N-th order shape function per particle is a N+2 element array + where the shape function contributes to only N+1 elements. + We need to find which indices are contributing to the shape function + For this we first compute the indices of the particle position + + Let * be the particle position at the current timestep + Let x be the particle position at the previous timestep + + + 0 1 (...) N N+1 + ___________________________________ + | x* | x* | ... | x* | | // i_init_min = i_fin_min + |______|______|______|______|______| + | x | x* | ... | x* | * | // i_init_min < i_fin_min + |______|______|______|______|______| + | * | x* | ... | x* | x | // i_init_min > i_fin_min + |______|______|______|______|______| + */ + int i_init_min, i_fin_min; real_t iS_[O + 1], fS_[O + 1]; From 12210687806b5462b87f7519b9a5c0e2e84ff803 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 14:21:39 -0500 Subject: [PATCH 053/154] cleanup and updates to generalized version (wip) --- src/kernels/currents_deposit.hpp | 1898 ++++++------------------------ src/kernels/particle_shapes.hpp | 27 +- 2 files changed, 354 insertions(+), 1571 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index a4d62aa83..b363b3819 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -389,1513 +389,249 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr (O == 2u) { - /* - * Higher order charge conserving current deposition based on - * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - **/ - - // iS -> shape function for init position - // fS -> shape function for final position - - // shape function at integer points (one coeff is always ZERO) - int i1_min; - real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; - real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - - // clang-format off - prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - i1(p), static_cast(dx1(p)), - i1_min, - iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); - // clang-format on - - if constexpr (D == Dim::_1D) { - raise::KernelNotImplementedError(HERE); - } else if constexpr (D == Dim::_2D) { - - // shape function at integer points (one coeff is always ZERO) - int i2_min; - real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; - real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - - // clang-format off - prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - i2(p), static_cast(dx2(p)), - i2_min, - iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); - // clang-format on - // x1-components - const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); - const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); - const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); - const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); - const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); - const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); - const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); - const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); - const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); - const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - - const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); - const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); - const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); - const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - - // x2-components - const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); - const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); - const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); - const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); - const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); - const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); - const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); - const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); - const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); - const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - - const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); - const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); - const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); - const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - - // x3-components - const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); - const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + - iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - - const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); - const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + - iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - - const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); - const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + - iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - - const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); - const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + - iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - - // x1-component - const auto jx1_00 = Wx1_00; - const auto jx1_10 = jx1_00 + Wx1_10; - const auto jx1_20 = jx1_10 + Wx1_20; - const auto jx1_30 = jx1_20 + Wx1_30; - - const auto jx1_01 = Wx1_01; - const auto jx1_11 = jx1_01 + Wx1_11; - const auto jx1_21 = jx1_11 + Wx1_21; - const auto jx1_31 = jx1_21 + Wx1_31; - - const auto jx1_02 = Wx1_02; - const auto jx1_12 = jx1_02 + Wx1_12; - const auto jx1_22 = jx1_12 + Wx1_22; - const auto jx1_32 = jx1_22 + Wx1_32; - - const auto jx1_03 = Wx1_03; - const auto jx1_13 = jx1_03 + Wx1_13; - const auto jx1_23 = jx1_13 + Wx1_23; - const auto jx1_33 = jx1_23 + Wx1_33; - - // y-component - const auto jx2_00 = Wx2_00; - const auto jx2_01 = jx2_00 + Wx2_01; - const auto jx2_02 = jx2_01 + Wx2_02; - const auto jx2_03 = jx2_02 + Wx2_03; - - const auto jx2_10 = Wx2_10; - const auto jx2_11 = jx2_10 + Wx2_11; - const auto jx2_12 = jx2_11 + Wx2_12; - const auto jx2_13 = jx2_12 + Wx2_13; - - const auto jx2_20 = Wx2_20; - const auto jx2_21 = jx2_20 + Wx2_21; - const auto jx2_22 = jx2_21 + Wx2_22; - const auto jx2_23 = jx2_22 + Wx2_23; - - const auto jx2_30 = Wx2_30; - const auto jx2_31 = jx2_30 + Wx2_31; - const auto jx2_32 = jx2_31 + Wx2_32; - const auto jx2_33 = jx2_32 + Wx2_33; - - i1_min += N_GHOSTS; - i2_min += N_GHOSTS; - - // @TODO: not sure about the signs here - const real_t Qdx1dt = -coeff * inv_dt; - const real_t Qdx2dt = -coeff * inv_dt; - const real_t QVx3 = coeff * vp[2]; - - auto J_acc = J.access(); - - // x1-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - - // x2-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - - // x3-currents - J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; - J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; - J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; - J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - - J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; - J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; - J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; - J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - - J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; - J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; - J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; - J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - - J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; - J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; - J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; - J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - - } else if constexpr (D == Dim::_3D) { - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0y_0, S0y_1, S0y_2, S0y_3; - // // shape function at current timestep - // real_t S1y_0, S1y_1, S1y_2, S1y_3; - // // indices of the shape function - // ncells_t iy_min; - // bool update_y2; - // // find indices and define shape function - // // clang-format off - // shape_function_2nd(S0y_0, S0y_1, S0y_2, S0y_3, - // S1y_0, S1y_1, S1y_2, S1y_3, - // iy_min, update_y2, - // i2(p), dx2(p), - // i2_prev(p), dx2_prev(p)); - // // clang-format on - // - // /* - // y - direction - // */ - // - // // shape function at previous timestep - // real_t S0z_0, S0z_1, S0z_2, S0z_3; - // // shape function at current timestep - // real_t S1z_0, S1z_1, S1z_2, S1z_3; - // // indices of the shape function - // ncells_t iz_min; - // bool update_z2; - // // find indices and define shape function - // // clang-format off - // shape_function_2nd(S0z_0, S0z_1, S0z_2, S0z_3, - // S1z_0, S1z_1, S1z_2, S1z_3, - // iz_min, update_z2, - // i3(p), dx3(p), - // i3_prev(p), dx3_prev(p)); - // // clang-format on - // - // // Unrolled calculations for Wx, Wy, and Wz - // // clang-format off - // const auto Wx_0_0_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_0_0_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_0_0_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_0_0_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_0_1_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_0_1_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_0_1_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_0_1_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_0_2_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_0_2_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_0_2_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_0_2_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_0_3_0 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_0_3_1 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_0_3_2 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_0_3_3 = THIRD * (S1x_0 - S0x_0) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_1_0_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_1_0_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_1_0_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_1_0_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_1_1_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_1_1_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_1_1_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_1_1_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_1_2_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_1_2_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_1_2_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_1_2_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_1_3_0 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_1_3_1 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_1_3_2 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_1_3_3 = THIRD * (S1x_1 - S0x_1) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const auto Wx_2_0_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_0 + S1y_0 * S1z_0) + - // HALF * (S0z_0 * S1y_0 + S0y_0 * S1z_0)); - // const auto Wx_2_0_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_1 + S1y_0 * S1z_1) + - // HALF * (S0z_1 * S1y_0 + S0y_0 * S1z_1)); - // const auto Wx_2_0_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_2 + S1y_0 * S1z_2) + - // HALF * (S0z_2 * S1y_0 + S0y_0 * S1z_2)); - // const auto Wx_2_0_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_0 * S0z_3 + S1y_0 * S1z_3) + - // HALF * (S0z_3 * S1y_0 + S0y_0 * S1z_3)); - // - // const auto Wx_2_1_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_0 + S1y_1 * S1z_0) + - // HALF * (S0z_0 * S1y_1 + S0y_1 * S1z_0)); - // const auto Wx_2_1_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_1 + S1y_1 * S1z_1) + - // HALF * (S0z_1 * S1y_1 + S0y_1 * S1z_1)); - // const auto Wx_2_1_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_2 + S1y_1 * S1z_2) + - // HALF * (S0z_2 * S1y_1 + S0y_1 * S1z_2)); - // const auto Wx_2_1_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_1 * S0z_3 + S1y_1 * S1z_3) + - // HALF * (S0z_3 * S1y_1 + S0y_1 * S1z_3)); - // - // const auto Wx_2_2_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_0 + S1y_2 * S1z_0) + - // HALF * (S0z_0 * S1y_2 + S0y_2 * S1z_0)); - // const auto Wx_2_2_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_1 + S1y_2 * S1z_1) + - // HALF * (S0z_1 * S1y_2 + S0y_2 * S1z_1)); - // const auto Wx_2_2_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_2 + S1y_2 * S1z_2) + - // HALF * (S0z_2 * S1y_2 + S0y_2 * S1z_2)); - // const auto Wx_2_2_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_2 * S0z_3 + S1y_2 * S1z_3) + - // HALF * (S0z_3 * S1y_2 + S0y_2 * S1z_3)); - // - // const auto Wx_2_3_0 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_0 + S1y_3 * S1z_0) + - // HALF * (S0z_0 * S1y_3 + S0y_3 * S1z_0)); - // const auto Wx_2_3_1 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_1 + S1y_3 * S1z_1) + - // HALF * (S0z_1 * S1y_3 + S0y_3 * S1z_1)); - // const auto Wx_2_3_2 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_2 + S1y_3 * S1z_2) + - // HALF * (S0z_2 * S1y_3 + S0y_3 * S1z_2)); - // const auto Wx_2_3_3 = THIRD * (S1x_2 - S0x_2) * - // ((S0y_3 * S0z_3 + S1y_3 * S1z_3) + - // HALF * (S0z_3 * S1y_3 + S0y_3 * S1z_3)); - // - // const real_t Qdxdt = coeff * inv_dt; - // - // const auto jx_0_0_0 = - Qdxdt * Wx_0_0_0; - // const auto jx_1_0_0 = jx_0_0_0 - Qdxdt * Wx_1_0_0; - // const auto jx_2_0_0 = jx_1_0_0 - Qdxdt * Wx_2_0_0; - // const auto jx_0_1_0 = - Qdxdt * Wx_0_1_0; - // const auto jx_1_1_0 = jx_0_1_0 - Qdxdt * Wx_1_1_0; - // const auto jx_2_1_0 = jx_1_1_0 - Qdxdt * Wx_2_1_0; - // const auto jx_0_2_0 = - Qdxdt * Wx_0_2_0; - // const auto jx_1_2_0 = jx_0_2_0 - Qdxdt * Wx_1_2_0; - // const auto jx_2_2_0 = jx_1_2_0 - Qdxdt * Wx_2_2_0; - // const auto jx_0_3_0 = - Qdxdt * Wx_0_3_0; - // const auto jx_1_3_0 = jx_0_3_0 - Qdxdt * Wx_1_3_0; - // const auto jx_2_3_0 = jx_1_3_0 - Qdxdt * Wx_2_3_0; - // - // const auto jx_0_0_1 = - Qdxdt * Wx_0_0_1; - // const auto jx_1_0_1 = jx_0_0_1 - Qdxdt * Wx_1_0_1; - // const auto jx_2_0_1 = jx_1_0_1 - Qdxdt * Wx_2_0_1; - // const auto jx_0_1_1 = - Qdxdt * Wx_0_1_1; - // const auto jx_1_1_1 = jx_0_1_1 - Qdxdt * Wx_1_1_1; - // const auto jx_2_1_1 = jx_1_1_1 - Qdxdt * Wx_2_1_1; - // const auto jx_0_2_1 = - Qdxdt * Wx_0_2_1; - // const auto jx_1_2_1 = jx_0_2_1 - Qdxdt * Wx_1_2_1; - // const auto jx_2_2_1 = jx_1_2_1 - Qdxdt * Wx_2_2_1; - // const auto jx_0_3_1 = - Qdxdt * Wx_0_3_1; - // const auto jx_1_3_1 = jx_0_3_1 - Qdxdt * Wx_1_3_1; - // const auto jx_2_3_1 = jx_1_3_1 - Qdxdt * Wx_2_3_1; - // - // const auto jx_0_0_2 = - Qdxdt * Wx_0_0_2; - // const auto jx_1_0_2 = jx_0_0_2 - Qdxdt * Wx_1_0_2; - // const auto jx_2_0_2 = jx_1_0_2 - Qdxdt * Wx_2_0_2; - // const auto jx_0_1_2 = - Qdxdt * Wx_0_1_2; - // const auto jx_1_1_2 = jx_0_1_2 - Qdxdt * Wx_1_1_2; - // const auto jx_2_1_2 = jx_1_1_2 - Qdxdt * Wx_2_1_2; - // const auto jx_0_2_2 = - Qdxdt * Wx_0_2_2; - // const auto jx_1_2_2 = jx_0_2_2 - Qdxdt * Wx_1_2_2; - // const auto jx_2_2_2 = jx_1_2_2 - Qdxdt * Wx_2_2_2; - // const auto jx_0_3_2 = - Qdxdt * Wx_0_3_2; - // const auto jx_1_3_2 = jx_0_3_2 - Qdxdt * Wx_1_3_2; - // const auto jx_2_3_2 = jx_1_3_2 - Qdxdt * Wx_2_3_2; - // - // const auto jx_0_0_3 = - Qdxdt * Wx_0_0_3; - // const auto jx_1_0_3 = jx_0_0_3 - Qdxdt * Wx_1_0_3; - // const auto jx_2_0_3 = jx_1_0_3 - Qdxdt * Wx_2_0_3; - // const auto jx_0_1_3 = - Qdxdt * Wx_0_1_3; - // const auto jx_1_1_3 = jx_0_1_3 - Qdxdt * Wx_1_1_3; - // const auto jx_2_1_3 = jx_1_1_3 - Qdxdt * Wx_2_1_3; - // const auto jx_0_2_3 = - Qdxdt * Wx_0_2_3; - // const auto jx_1_2_3 = jx_0_2_3 - Qdxdt * Wx_1_2_3; - // const auto jx_2_2_3 = jx_1_2_3 - Qdxdt * Wx_2_2_3; - // const auto jx_0_3_3 = - Qdxdt * Wx_0_3_3; - // const auto jx_1_3_3 = jx_0_3_3 - Qdxdt * Wx_1_3_3; - // const auto jx_2_3_3 = jx_1_3_3 - Qdxdt * Wx_2_3_3; - // - // /* - // y-component - // */ - // const auto Wy_0_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_0_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_0 + S1x_0 * S1z_0 + - // HALF * (S0z_0 * S1x_0 + S0x_0 * S1z_0)); - // const auto Wy_0_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_1 + S1x_0 * S1z_1 + - // HALF * (S0z_1 * S1x_0 + S0x_0 * S1z_1)); - // const auto Wy_0_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_2 + S1x_0 * S1z_2 + - // HALF * (S0z_2 * S1x_0 + S0x_0 * S1z_2)); - // const auto Wy_0_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_0 * S0z_3 + S1x_0 * S1z_3 + - // HALF * (S0z_3 * S1x_0 + S0x_0 * S1z_3)); - // - // const auto Wy_1_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_1_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_0 + S1x_1 * S1z_0 + - // HALF * (S0z_0 * S1x_1 + S0x_1 * S1z_0)); - // const auto Wy_1_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_1 + S1x_1 * S1z_1 + - // HALF * (S0z_1 * S1x_1 + S0x_1 * S1z_1)); - // const auto Wy_1_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_2 + S1x_1 * S1z_2 + - // HALF * (S0z_2 * S1x_1 + S0x_1 * S1z_2)); - // const auto Wy_1_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_1 * S0z_3 + S1x_1 * S1z_3 + - // HALF * (S0z_3 * S1x_1 + S0x_1 * S1z_3)); - // - // const auto Wy_2_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_2_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_0 + S1x_2 * S1z_0 + - // HALF * (S0z_0 * S1x_2 + S0x_2 * S1z_0)); - // const auto Wy_2_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_1 + S1x_2 * S1z_1 + - // HALF * (S0z_1 * S1x_2 + S0x_2 * S1z_1)); - // const auto Wy_2_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_2 + S1x_2 * S1z_2 + - // HALF * (S0z_2 * S1x_2 + S0x_2 * S1z_2)); - // const auto Wy_2_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_2 * S0z_3 + S1x_2 * S1z_3 + - // HALF * (S0z_3 * S1x_2 + S0x_2 * S1z_3)); - // - // const auto Wy_3_0_0 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_0_1 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_0_2 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_0_3 = THIRD * (S1y_0 - S0y_0) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_1_0 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_1_1 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_1_2 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_1_3 = THIRD * (S1y_1 - S0y_1) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const auto Wy_3_2_0 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_0 + S1x_3 * S1z_0 + - // HALF * (S0z_0 * S1x_3 + S0x_3 * S1z_0)); - // const auto Wy_3_2_1 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_1 + S1x_3 * S1z_1 + - // HALF * (S0z_1 * S1x_3 + S0x_3 * S1z_1)); - // const auto Wy_3_2_2 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_2 + S1x_3 * S1z_2 + - // HALF * (S0z_2 * S1x_3 + S0x_3 * S1z_2)); - // const auto Wy_3_2_3 = THIRD * (S1y_2 - S0y_2) * - // (S0x_3 * S0z_3 + S1x_3 * S1z_3 + - // HALF * (S0z_3 * S1x_3 + S0x_3 * S1z_3)); - // - // const real_t Qdydt = coeff * inv_dt; - // - // const auto jy_0_0_0 = - Qdydt * Wy_0_0_0; - // const auto jy_0_1_0 = jy_0_0_0 - Qdydt * Wy_0_1_0; - // const auto jy_0_2_0 = jy_0_1_0 - Qdydt * Wy_0_2_0; - // const auto jy_1_0_0 = - Qdydt * Wy_1_0_0; - // const auto jy_1_1_0 = jy_1_0_0 - Qdydt * Wy_1_1_0; - // const auto jy_1_2_0 = jy_1_1_0 - Qdydt * Wy_1_2_0; - // const auto jy_2_0_0 = - Qdydt * Wy_2_0_0; - // const auto jy_2_1_0 = jy_2_0_0 - Qdydt * Wy_2_1_0; - // const auto jy_2_2_0 = jy_2_1_0 - Qdydt * Wy_2_2_0; - // const auto jy_3_0_0 = - Qdydt * Wy_3_0_0; - // const auto jy_3_1_0 = jy_3_0_0 - Qdydt * Wy_3_1_0; - // const auto jy_3_2_0 = jy_3_1_0 - Qdydt * Wy_3_2_0; - // - // const auto jy_0_0_1 = - Qdydt * Wy_0_0_1; - // const auto jy_0_1_1 = jy_0_0_1 - Qdydt * Wy_0_1_1; - // const auto jy_0_2_1 = jy_0_1_1 - Qdydt * Wy_0_2_1; - // const auto jy_1_0_1 = - Qdydt * Wy_1_0_1; - // const auto jy_1_1_1 = jy_1_0_1 - Qdydt * Wy_1_1_1; - // const auto jy_1_2_1 = jy_1_1_1 - Qdydt * Wy_1_2_1; - // const auto jy_2_0_1 = - Qdydt * Wy_2_0_1; - // const auto jy_2_1_1 = jy_2_0_1 - Qdydt * Wy_2_1_1; - // const auto jy_2_2_1 = jy_2_1_1 - Qdydt * Wy_2_2_1; - // const auto jy_3_0_1 = - Qdydt * Wy_3_0_1; - // const auto jy_3_1_1 = jy_3_0_1 - Qdydt * Wy_3_1_1; - // const auto jy_3_2_1 = jy_3_1_1 - Qdydt * Wy_3_2_1; - // - // const auto jy_0_0_2 = - Qdydt * Wy_0_0_2; - // const auto jy_0_1_2 = jy_0_0_2 - Qdydt * Wy_0_1_2; - // const auto jy_0_2_2 = jy_0_1_2 - Qdydt * Wy_0_2_2; - // const auto jy_1_0_2 = - Qdydt * Wy_1_0_2; - // const auto jy_1_1_2 = jy_1_0_2 - Qdydt * Wy_1_1_2; - // const auto jy_1_2_2 = jy_1_1_2 - Qdydt * Wy_1_2_2; - // const auto jy_2_0_2 = - Qdydt * Wy_2_0_2; - // const auto jy_2_1_2 = jy_2_0_2 - Qdydt * Wy_2_1_2; - // const auto jy_2_2_2 = jy_2_1_2 - Qdydt * Wy_2_2_2; - // const auto jy_3_0_2 = - Qdydt * Wy_3_0_2; - // const auto jy_3_1_2 = jy_3_0_2 - Qdydt * Wy_3_1_2; - // const auto jy_3_2_2 = jy_3_1_2 - Qdydt * Wy_3_2_2; - // - // const auto jy_0_0_3 = - Qdydt * Wy_0_0_3; - // const auto jy_0_1_3 = jy_0_0_3 - Qdydt * Wy_0_1_3; - // const auto jy_0_2_3 = jy_0_1_3 - Qdydt * Wy_0_2_3; - // const auto jy_1_0_3 = - Qdydt * Wy_1_0_3; - // const auto jy_1_1_3 = jy_1_0_3 - Qdydt * Wy_1_1_3; - // const auto jy_1_2_3 = jy_1_1_3 - Qdydt * Wy_1_2_3; - // const auto jy_2_0_3 = - Qdydt * Wy_2_0_3; - // const auto jy_2_1_3 = jy_2_0_3 - Qdydt * Wy_2_1_3; - // const auto jy_2_2_3 = jy_2_1_3 - Qdydt * Wy_2_2_3; - // const auto jy_3_0_3 = - Qdydt * Wy_3_0_3; - // const auto jy_3_1_3 = jy_3_0_3 - Qdydt * Wy_3_1_3; - // const auto jy_3_2_3 = jy_3_1_3 - Qdydt * Wy_3_2_3; - // - // /* - // z - component - // */ - // const auto Wz_0_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // const auto Wz_0_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_0 + S1x_0 * S1y_0 + - // HALF * (S0x_0 * S1y_0 + S0y_0 * S1x_0)); - // - // const auto Wz_0_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // const auto Wz_0_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_1 + S1x_0 * S1y_1 + - // HALF * (S0x_0 * S1y_1 + S0y_1 * S1x_0)); - // - // const auto Wz_0_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // const auto Wz_0_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_2 + S1x_0 * S1y_2 + - // HALF * (S0x_0 * S1y_2 + S0y_2 * S1x_0)); - // - // const auto Wz_0_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // const auto Wz_0_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_0 * S0y_3 + S1x_0 * S1y_3 + - // HALF * (S0x_0 * S1y_3 + S0y_3 * S1x_0)); - // - // // Unrolled loop for Wz[i][j][k] with i = 1 and interp_order + 2 = 4 - // const auto Wz_1_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // const auto Wz_1_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_0 + S1x_1 * S1y_0 + - // HALF * (S0x_1 * S1y_0 + S0y_0 * S1x_1)); - // - // const auto Wz_1_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // const auto Wz_1_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_1 + S1x_1 * S1y_1 + - // HALF * (S0x_1 * S1y_1 + S0y_1 * S1x_1)); - // - // const auto Wz_1_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // const auto Wz_1_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_2 + S1x_1 * S1y_2 + - // HALF * (S0x_1 * S1y_2 + S0y_2 * S1x_1)); - // - // const auto Wz_1_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // const auto Wz_1_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_1 * S0y_3 + S1x_1 * S1y_3 + - // HALF * (S0x_1 * S1y_3 + S0y_3 * S1x_1)); - // - // // Unrolled loop for Wz[i][j][k] with i = 2 and interp_order + 2 = 4 - // const auto Wz_2_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // const auto Wz_2_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // const auto Wz_2_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_0 + S1x_2 * S1y_0 + - // HALF * (S0x_2 * S1y_0 + S0y_0 * S1x_2)); - // - // const auto Wz_2_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // const auto Wz_2_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // const auto Wz_2_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_1 + S1x_2 * S1y_1 + - // HALF * (S0x_2 * S1y_1 + S0y_1 * S1x_2)); - // - // const auto Wz_2_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // const auto Wz_2_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // const auto Wz_2_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_2 + S1x_2 * S1y_2 + - // HALF * (S0x_2 * S1y_2 + S0y_2 * S1x_2)); - // - // const auto Wz_2_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // const auto Wz_2_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // const auto Wz_2_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_2 * S0y_3 + S1x_2 * S1y_3 + - // HALF * (S0x_2 * S1y_3 + S0y_3 * S1x_2)); - // - // // Unrolled loop for Wz[i][j][k] with i = 3 and interp_order + 2 = 4 - // const auto Wz_3_0_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // const auto Wz_3_0_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_0 + S1x_3 * S1y_0 + - // HALF * (S0x_3 * S1y_0 + S0y_0 * S1x_3)); - // - // const auto Wz_3_1_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // const auto Wz_3_1_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_1 + S1x_3 * S1y_1 + - // HALF * (S0x_3 * S1y_1 + S0y_1 * S1x_3)); - // - // const auto Wz_3_2_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // const auto Wz_3_2_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_2 + S1x_3 * S1y_2 + - // HALF * (S0x_3 * S1y_2 + S0y_2 * S1x_3)); - // - // const auto Wz_3_3_0 = THIRD * (S1z_0 - S0z_0) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_1 = THIRD * (S1z_1 - S0z_1) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // const auto Wz_3_3_2 = THIRD * (S1z_2 - S0z_2) * - // (S0x_3 * S0y_3 + S1x_3 * S1y_3 + - // HALF * (S0x_3 * S1y_3 + S0y_3 * S1x_3)); - // - // const real_t Qdzdt = coeff * inv_dt; - // - // const auto jz_0_0_0 = - Qdzdt * Wz_0_0_0; - // const auto jz_0_0_1 = jz_0_0_0 - Qdzdt * Wz_0_0_1; - // const auto jz_0_0_2 = jz_0_0_1 - Qdzdt * Wz_0_0_2; - // const auto jz_0_1_0 = - Qdzdt * Wz_0_1_0; - // const auto jz_0_1_1 = jz_0_1_0 - Qdzdt * Wz_0_1_1; - // const auto jz_0_1_2 = jz_0_1_1 - Qdzdt * Wz_0_1_2; - // const auto jz_0_2_0 = - Qdzdt * Wz_0_2_0; - // const auto jz_0_2_1 = jz_0_2_0 - Qdzdt * Wz_0_2_1; - // const auto jz_0_2_2 = jz_0_2_1 - Qdzdt * Wz_0_2_2; - // const auto jz_0_3_0 = - Qdzdt * Wz_0_3_0; - // const auto jz_0_3_1 = jz_0_3_0 - Qdzdt * Wz_0_3_1; - // const auto jz_0_3_2 = jz_0_3_1 - Qdzdt * Wz_0_3_2; - // - // const auto jz_1_0_0 = - Qdzdt * Wz_1_0_0; - // const auto jz_1_0_1 = jz_1_0_0 - Qdzdt * Wz_1_0_1; - // const auto jz_1_0_2 = jz_1_0_1 - Qdzdt * Wz_1_0_2; - // const auto jz_1_1_0 = - Qdzdt * Wz_1_1_0; - // const auto jz_1_1_1 = jz_1_1_0 - Qdzdt * Wz_1_1_1; - // const auto jz_1_1_2 = jz_1_1_1 - Qdzdt * Wz_1_1_2; - // const auto jz_1_2_0 = - Qdzdt * Wz_1_2_0; - // const auto jz_1_2_1 = jz_1_2_0 - Qdzdt * Wz_1_2_1; - // const auto jz_1_2_2 = jz_1_2_1 - Qdzdt * Wz_1_2_2; - // const auto jz_1_3_0 = - Qdzdt * Wz_1_3_0; - // const auto jz_1_3_1 = jz_1_3_0 - Qdzdt * Wz_1_3_1; - // const auto jz_1_3_2 = jz_1_3_1 - Qdzdt * Wz_1_3_2; - // - // const auto jz_2_0_0 = - Qdzdt * Wz_2_0_0; - // const auto jz_2_0_1 = jz_2_0_0 - Qdzdt * Wz_2_0_1; - // const auto jz_2_0_2 = jz_2_0_1 - Qdzdt * Wz_2_0_2; - // const auto jz_2_1_0 = - Qdzdt * Wz_2_1_0; - // const auto jz_2_1_1 = jz_2_1_0 - Qdzdt * Wz_2_1_1; - // const auto jz_2_1_2 = jz_2_1_1 - Qdzdt * Wz_2_1_2; - // const auto jz_2_2_0 = - Qdzdt * Wz_2_2_0; - // const auto jz_2_2_1 = jz_2_2_0 - Qdzdt * Wz_2_2_1; - // const auto jz_2_2_2 = jz_2_2_1 - Qdzdt * Wz_2_2_2; - // const auto jz_2_3_0 = - Qdzdt * Wz_2_3_0; - // const auto jz_2_3_1 = jz_2_3_0 - Qdzdt * Wz_2_3_1; - // const auto jz_2_3_2 = jz_2_3_1 - Qdzdt * Wz_2_3_2; - // - // const auto jz_3_0_0 = - Qdzdt * Wz_3_0_0; - // const auto jz_3_0_1 = jz_3_0_0 - Qdzdt * Wz_3_0_1; - // const auto jz_3_0_2 = jz_3_0_1 - Qdzdt * Wz_3_0_2; - // const auto jz_3_1_0 = - Qdzdt * Wz_3_1_0; - // const auto jz_3_1_1 = jz_3_1_0 - Qdzdt * Wz_3_1_1; - // const auto jz_3_1_2 = jz_3_1_1 - Qdzdt * Wz_3_1_2; - // const auto jz_3_2_0 = - Qdzdt * Wz_3_2_0; - // const auto jz_3_2_1 = jz_3_2_0 - Qdzdt * Wz_3_2_1; - // const auto jz_3_2_2 = jz_3_2_1 - Qdzdt * Wz_3_2_2; - // const auto jz_3_3_0 = - Qdzdt * Wz_3_3_0; - // const auto jz_3_3_1 = jz_3_3_0 - Qdzdt * Wz_3_3_1; - // const auto jz_3_3_2 = jz_3_3_1 - Qdzdt * Wz_3_3_2; - // - // - // /* - // Current update - // */ - // auto J_acc = J.access(); - // - // J_acc(ix_min, iy_min, iz_min, cur::jx1) += jx_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx1) += jx_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx1) += jx_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx1) += jx_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx1) += jx_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx1) += jx_0_1_2; - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx1) += jx_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx1) += jx_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx1) += jx_0_2_2; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx1) += jx_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx1) += jx_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx1) += jx_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx1) += jx_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx1) += jx_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx1) += jx_1_1_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx1) += jx_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx1) += jx_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx1) += jx_1_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx1) += jx_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx1) += jx_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx1) += jx_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx1) += jx_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx1) += jx_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx1) += jx_2_1_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx1) += jx_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx1) += jx_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx1) += jx_2_2_2; - // - // if (update_y2) - // { - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx1) += jx_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx1) += jx_2_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx1) += jx_2_3_2; - // } - // - // if (update_z2) - // { - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx1) += jx_2_0_3; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx1) += jx_2_1_3; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx1) += jx_2_2_3; - // - // if (update_y2) - // { - // J_acc(ix_min + 2, iy_min + 3, iz_min + 3, cur::jx1) += jx_2_3_3; - // } - // } - // } - // // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx1) += jx_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx1) += jx_0_3_1; - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx1) += jx_0_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx1) += jx_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx1) += jx_1_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx1) += jx_1_3_2; - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx1) += jx_0_0_3; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx1) += jx_0_1_3; - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx1) += jx_0_2_3; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx1) += jx_1_0_3; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx1) += jx_1_1_3; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx1) += jx_1_2_3; - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min + 3, cur::jx1) += jx_0_3_3; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 3, cur::jx1) += jx_1_3_3; - // } - // } - // - // - // /* - // y-component - // */ - // J_acc(ix_min, iy_min, iz_min, cur::jx2) += jy_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx2) += jy_0_0_1; - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx2) += jy_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx2) += jy_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx2) += jy_0_1_1; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx2) += jy_0_1_2; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx2) += jy_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx2) += jy_1_0_1; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx2) += jy_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx2) += jy_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx2) += jy_1_1_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx2) += jy_1_1_2; - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx2) += jy_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx2) += jy_2_0_1; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx2) += jy_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx2) += jy_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx2) += jy_2_1_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx2) += jy_2_1_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx2) += jy_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx2) += jy_3_0_1; - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx2) += jy_3_0_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx2) += jy_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx2) += jy_3_1_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx2) += jy_3_1_2; - // - // if (update_z2) - // { - // J_acc(ix_min + 3, iy_min, iz_min + 3, cur::jx2) += jy_3_0_3; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 3, cur::jx2) += jy_3_1_3; - // } - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx2) += jy_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx2) += jy_0_2_1; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx2) += jy_0_2_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx2) += jy_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx2) += jy_1_2_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx2) += jy_1_2_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx2) += jy_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx2) += jy_2_2_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx2) += jy_2_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx2) += jy_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx2) += jy_3_2_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx2) += jy_3_2_2; - // - // if (update_z2) - // { - // J_acc(ix_min + 2, iy_min + 2, iz_min + 3, cur::jx2) += jy_2_2_3; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 3, cur::jx2) += jy_3_2_3; - // } - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min + 2, iz_min + 3, cur::jx2) += jy_0_2_3; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 3, cur::jx2) += jy_1_2_3; - // } - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 3, cur::jx2) += jy_0_0_3; - // J_acc(ix_min, iy_min + 1, iz_min + 3, cur::jx2) += jy_0_1_3; - // J_acc(ix_min + 1, iy_min, iz_min + 3, cur::jx2) += jy_1_0_3; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 3, cur::jx2) += jy_1_1_3; - // J_acc(ix_min + 2, iy_min, iz_min + 3, cur::jx2) += jy_2_0_3; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 3, cur::jx2) += jy_2_1_3; - // } - // - // /* - // z-component - // */ - // J_acc(ix_min, iy_min, iz_min, cur::jx3) += jz_0_0_0; - // J_acc(ix_min, iy_min, iz_min + 1, cur::jx3) += jz_0_0_1; - // J_acc(ix_min, iy_min + 1, iz_min, cur::jx3) += jz_0_1_0; - // J_acc(ix_min, iy_min + 1, iz_min + 1, cur::jx3) += jz_0_1_1; - // J_acc(ix_min, iy_min + 2, iz_min, cur::jx3) += jz_0_2_0; - // J_acc(ix_min, iy_min + 2, iz_min + 1, cur::jx3) += jz_0_2_1; - // J_acc(ix_min + 1, iy_min, iz_min, cur::jx3) += jz_1_0_0; - // J_acc(ix_min + 1, iy_min, iz_min + 1, cur::jx3) += jz_1_0_1; - // J_acc(ix_min + 1, iy_min + 1, iz_min, cur::jx3) += jz_1_1_0; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 1, cur::jx3) += jz_1_1_1; - // J_acc(ix_min + 1, iy_min + 2, iz_min, cur::jx3) += jz_1_2_0; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 1, cur::jx3) += jz_1_2_1; - // J_acc(ix_min + 2, iy_min, iz_min, cur::jx3) += jz_2_0_0; - // J_acc(ix_min + 2, iy_min, iz_min + 1, cur::jx3) += jz_2_0_1; - // J_acc(ix_min + 2, iy_min + 1, iz_min, cur::jx3) += jz_2_1_0; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 1, cur::jx3) += jz_2_1_1; - // J_acc(ix_min + 2, iy_min + 2, iz_min, cur::jx3) += jz_2_2_0; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 1, cur::jx3) += jz_2_2_1; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min, cur::jx3) += jz_3_0_0; - // J_acc(ix_min + 3, iy_min, iz_min + 1, cur::jx3) += jz_3_0_1; - // J_acc(ix_min + 3, iy_min + 1, iz_min, cur::jx3) += jz_3_1_0; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 1, cur::jx3) += jz_3_1_1; - // J_acc(ix_min + 3, iy_min + 2, iz_min, cur::jx3) += jz_3_2_0; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 1, cur::jx3) += jz_3_2_1; - // J_acc(ix_min + 3, iy_min + 3, iz_min, cur::jx3) += jz_3_3_0; - // J_acc(ix_min + 3, iy_min + 3, iz_min + 1, cur::jx3) += jz_3_3_1; - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min, cur::jx3) += jz_0_3_0; - // J_acc(ix_min, iy_min + 3, iz_min + 1, cur::jx3) += jz_0_3_1; - // J_acc(ix_min + 1, iy_min + 3, iz_min, cur::jx3) += jz_1_3_0; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 1, cur::jx3) += jz_1_3_1; - // J_acc(ix_min + 2, iy_min + 3, iz_min, cur::jx3) += jz_2_3_0; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 1, cur::jx3) += jz_2_3_1; - // } - // - // if (update_z2) - // { - // J_acc(ix_min, iy_min, iz_min + 2, cur::jx3) += jz_0_0_2; - // J_acc(ix_min, iy_min + 1, iz_min + 2, cur::jx3) += jz_0_1_2; - // J_acc(ix_min, iy_min + 2, iz_min + 2, cur::jx3) += jz_0_2_2; - // J_acc(ix_min + 1, iy_min, iz_min + 2, cur::jx3) += jz_1_0_2; - // J_acc(ix_min + 1, iy_min + 1, iz_min + 2, cur::jx3) += jz_1_1_2; - // J_acc(ix_min + 1, iy_min + 2, iz_min + 2, cur::jx3) += jz_1_2_2; - // J_acc(ix_min + 2, iy_min, iz_min + 2, cur::jx3) += jz_2_0_2; - // J_acc(ix_min + 2, iy_min + 1, iz_min + 2, cur::jx3) += jz_2_1_2; - // J_acc(ix_min + 2, iy_min + 2, iz_min + 2, cur::jx3) += jz_2_2_2; - // - // if (update_x2) - // { - // J_acc(ix_min + 3, iy_min, iz_min + 2, cur::jx3) += jz_3_0_2; - // J_acc(ix_min + 3, iy_min + 1, iz_min + 2, cur::jx3) += jz_3_1_2; - // J_acc(ix_min + 3, iy_min + 2, iz_min + 2, cur::jx3) += jz_3_2_2; - // - // if (update_y2) - // { - // J_acc(ix_min + 3, iy_min + 3, iz_min + 2, cur::jx3) += jz_3_3_2; - // } - // } - // - // if (update_y2) - // { - // J_acc(ix_min, iy_min + 3, iz_min + 2, cur::jx3) += jz_0_3_2; - // J_acc(ix_min + 1, iy_min + 3, iz_min + 2, cur::jx3) += jz_1_3_2; - // J_acc(ix_min + 2, iy_min + 3, iz_min + 2, cur::jx3) += jz_2_3_2; - // } - // } - // clang-format on - } // dimension - - } else if constexpr (O == 3u) { - /* - Higher order charge conserving current deposition based on - Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - - We need to define the follwowing variable: - - Shape functions in spatial directions for the particle position - before and after the current timestep. - S0_*, S1_* - - Density composition matrix - Wx_*, Wy_*, Wz_* - */ - - /* - x - direction - */ - - // shape function at previous timestep - real_t S0x_0, S0x_1, S0x_2, S0x_3, S0x_4; - // shape function at current timestep - real_t S1x_0, S1x_1, S1x_2, S1x_3, S1x_4; - // indices of the shape function - ncells_t ix_min; - bool update_x3; - // find indices and define shape function - // clang-format off - shape_function_3rd(S0x_0, S0x_1, S0x_2, S0x_3, S0x_4, - S1x_0, S1x_1, S1x_2, S1x_3, S1x_4, - ix_min, update_x3, - i1(p), dx1(p), - i1_prev(p), dx1_prev(p)); - // clang-format on - - if constexpr (D == Dim::_1D) { - // ToDo - } else if constexpr (D == Dim::_2D) { - - /* - y - direction - */ - - // shape function at previous timestep - real_t S0y_0, S0y_1, S0y_2, S0y_3, S0y_4; - // shape function at current timestep - real_t S1y_0, S1y_1, S1y_2, S1y_3, S1y_4; - // indices of the shape function - ncells_t iy_min; - bool update_y3; - // find indices and define shape function - // clang-format off - shape_function_3rd(S0y_0, S0y_1, S0y_2, S0y_3, S0y_4, - S1y_0, S1y_1, S1y_2, S1y_3, S1y_4, - iy_min, update_y3, - i2(p), dx2(p), - i2_prev(p), dx2_prev(p)); - // clang-format on - - // Esirkepov 2001, Eq. 38 - /* - x - component - */ - // Calculate weight function - unrolled - const auto Wx_0_0 = HALF * (S1x_0 - S0x_0) * (S0y_0 + S1y_0); - const auto Wx_0_1 = HALF * (S1x_0 - S0x_0) * (S0y_1 + S1y_1); - const auto Wx_0_2 = HALF * (S1x_0 - S0x_0) * (S0y_2 + S1y_2); - const auto Wx_0_3 = HALF * (S1x_0 - S0x_0) * (S0y_3 + S1y_3); - const auto Wx_0_4 = HALF * (S1x_0 - S0x_0) * (S0y_4 + S1y_4); - - const auto Wx_1_0 = HALF * (S1x_1 - S0x_1) * (S0y_0 + S1y_0); - const auto Wx_1_1 = HALF * (S1x_1 - S0x_1) * (S0y_1 + S1y_1); - const auto Wx_1_2 = HALF * (S1x_1 - S0x_1) * (S0y_2 + S1y_2); - const auto Wx_1_3 = HALF * (S1x_1 - S0x_1) * (S0y_3 + S1y_3); - const auto Wx_1_4 = HALF * (S1x_1 - S0x_1) * (S0y_4 + S1y_4); - - const auto Wx_2_0 = HALF * (S1x_2 - S0x_2) * (S0y_0 + S1y_0); - const auto Wx_2_1 = HALF * (S1x_2 - S0x_2) * (S0y_1 + S1y_1); - const auto Wx_2_2 = HALF * (S1x_2 - S0x_2) * (S0y_2 + S1y_2); - const auto Wx_2_3 = HALF * (S1x_2 - S0x_2) * (S0y_3 + S1y_3); - const auto Wx_2_4 = HALF * (S1x_2 - S0x_2) * (S0y_4 + S1y_4); - - const auto Wx_3_0 = HALF * (S1x_3 - S0x_3) * (S0y_0 + S1y_0); - const auto Wx_3_1 = HALF * (S1x_3 - S0x_3) * (S0y_1 + S1y_1); - const auto Wx_3_2 = HALF * (S1x_3 - S0x_3) * (S0y_2 + S1y_2); - const auto Wx_3_3 = HALF * (S1x_3 - S0x_3) * (S0y_3 + S1y_3); - const auto Wx_3_4 = HALF * (S1x_3 - S0x_3) * (S0y_4 + S1y_4); - - // Unrolled calculations for Wy - const auto Wy_0_0 = HALF * (S1x_0 + S0x_0) * (S1y_0 - S0y_0); - const auto Wy_0_1 = HALF * (S1x_0 + S0x_0) * (S1y_1 - S0y_1); - const auto Wy_0_2 = HALF * (S1x_0 + S0x_0) * (S1y_2 - S0y_2); - const auto Wy_0_3 = HALF * (S1x_0 + S0x_0) * (S1y_3 - S0y_3); - - const auto Wy_1_0 = HALF * (S1x_1 + S0x_1) * (S1y_0 - S0y_0); - const auto Wy_1_1 = HALF * (S1x_1 + S0x_1) * (S1y_1 - S0y_1); - const auto Wy_1_2 = HALF * (S1x_1 + S0x_1) * (S1y_2 - S0y_2); - const auto Wy_1_3 = HALF * (S1x_1 + S0x_1) * (S1y_3 - S0y_3); - - const auto Wy_2_0 = HALF * (S1x_2 + S0x_2) * (S1y_0 - S0y_0); - const auto Wy_2_1 = HALF * (S1x_2 + S0x_2) * (S1y_1 - S0y_1); - const auto Wy_2_2 = HALF * (S1x_2 + S0x_2) * (S1y_2 - S0y_2); - const auto Wy_2_3 = HALF * (S1x_2 + S0x_2) * (S1y_3 - S0y_3); - - const auto Wy_3_0 = HALF * (S1x_3 + S0x_3) * (S1y_0 - S0y_0); - const auto Wy_3_1 = HALF * (S1x_3 + S0x_3) * (S1y_1 - S0y_1); - const auto Wy_3_2 = HALF * (S1x_3 + S0x_3) * (S1y_2 - S0y_2); - const auto Wy_3_3 = HALF * (S1x_3 + S0x_3) * (S1y_3 - S0y_3); - - const auto Wy_4_0 = HALF * (S1x_4 + S0x_4) * (S1y_0 - S0y_0); - const auto Wy_4_1 = HALF * (S1x_4 + S0x_4) * (S1y_1 - S0y_1); - const auto Wy_4_2 = HALF * (S1x_4 + S0x_4) * (S1y_2 - S0y_2); - const auto Wy_4_3 = HALF * (S1x_4 + S0x_4) * (S1y_3 - S0y_3); - - // Unrolled calculations for Wz - const auto Wz_0_0 = THIRD * (S1y_0 * (HALF * S0x_0 + S1x_0) + - S0y_0 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_1 = THIRD * (S1y_1 * (HALF * S0x_0 + S1x_0) + - S0y_1 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_2 = THIRD * (S1y_2 * (HALF * S0x_0 + S1x_0) + - S0y_2 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_3 = THIRD * (S1y_3 * (HALF * S0x_0 + S1x_0) + - S0y_3 * (HALF * S1x_0 + S0x_0)); - const auto Wz_0_4 = THIRD * (S1y_4 * (HALF * S0x_0 + S1x_0) + - S0y_4 * (HALF * S1x_0 + S0x_0)); - - const auto Wz_1_0 = THIRD * (S1y_0 * (HALF * S0x_1 + S1x_1) + - S0y_0 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_1 = THIRD * (S1y_1 * (HALF * S0x_1 + S1x_1) + - S0y_1 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_2 = THIRD * (S1y_2 * (HALF * S0x_1 + S1x_1) + - S0y_2 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_3 = THIRD * (S1y_3 * (HALF * S0x_1 + S1x_1) + - S0y_3 * (HALF * S1x_1 + S0x_1)); - const auto Wz_1_4 = THIRD * (S1y_4 * (HALF * S0x_1 + S1x_1) + - S0y_4 * (HALF * S1x_1 + S0x_1)); - - const auto Wz_2_0 = THIRD * (S1y_0 * (HALF * S0x_2 + S1x_2) + - S0y_0 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_1 = THIRD * (S1y_1 * (HALF * S0x_2 + S1x_2) + - S0y_1 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_2 = THIRD * (S1y_2 * (HALF * S0x_2 + S1x_2) + - S0y_2 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_3 = THIRD * (S1y_3 * (HALF * S0x_2 + S1x_2) + - S0y_3 * (HALF * S1x_2 + S0x_2)); - const auto Wz_2_4 = THIRD * (S1y_4 * (HALF * S0x_2 + S1x_2) + - S0y_4 * (HALF * S1x_2 + S0x_2)); - - const auto Wz_3_0 = THIRD * (S1y_0 * (HALF * S0x_3 + S1x_3) + - S0y_0 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_1 = THIRD * (S1y_1 * (HALF * S0x_3 + S1x_3) + - S0y_1 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_2 = THIRD * (S1y_2 * (HALF * S0x_3 + S1x_3) + - S0y_2 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_3 = THIRD * (S1y_3 * (HALF * S0x_3 + S1x_3) + - S0y_3 * (HALF * S1x_3 + S0x_3)); - const auto Wz_3_4 = THIRD * (S1y_4 * (HALF * S0x_3 + S1x_3) + - S0y_4 * (HALF * S1x_3 + S0x_3)); - - const auto Wz_4_0 = THIRD * (S1y_0 * (HALF * S0x_4 + S1x_4) + - S0y_0 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_1 = THIRD * (S1y_1 * (HALF * S0x_4 + S1x_4) + - S0y_1 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_2 = THIRD * (S1y_2 * (HALF * S0x_4 + S1x_4) + - S0y_2 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_3 = THIRD * (S1y_3 * (HALF * S0x_4 + S1x_4) + - S0y_3 * (HALF * S1x_4 + S0x_4)); - const auto Wz_4_4 = THIRD * (S1y_4 * (HALF * S0x_4 + S1x_4) + - S0y_4 * (HALF * S1x_4 + S0x_4)); - - const real_t Qdxdt = coeff * inv_dt; - const real_t Qdydt = coeff * inv_dt; - const real_t QVz = coeff * inv_dt * vp[2]; - - // Esirkepov - Eq. 39 - // x-component - const auto jx_0_0 = -Qdxdt * Wx_0_0; - const auto jx_1_0 = jx_0_0 - Qdxdt * Wx_1_0; - const auto jx_2_0 = jx_1_0 - Qdxdt * Wx_2_0; - const auto jx_3_0 = jx_2_0 - Qdxdt * Wx_3_0; - - const auto jx_0_1 = -Qdxdt * Wx_0_1; - const auto jx_1_1 = jx_0_1 - Qdxdt * Wx_1_1; - const auto jx_2_1 = jx_1_1 - Qdxdt * Wx_2_1; - const auto jx_3_1 = jx_2_1 - Qdxdt * Wx_3_1; - - const auto jx_0_2 = -Qdxdt * Wx_0_2; - const auto jx_1_2 = jx_0_2 - Qdxdt * Wx_1_2; - const auto jx_2_2 = jx_1_2 - Qdxdt * Wx_2_2; - const auto jx_3_2 = jx_2_2 - Qdxdt * Wx_3_2; - - const auto jx_0_3 = -Qdxdt * Wx_0_3; - const auto jx_1_3 = jx_0_3 - Qdxdt * Wx_1_3; - const auto jx_2_3 = jx_1_3 - Qdxdt * Wx_2_3; - const auto jx_3_3 = jx_2_3 - Qdxdt * Wx_3_3; - - const auto jx_0_4 = -Qdxdt * Wx_0_4; - const auto jx_1_4 = jx_0_4 - Qdxdt * Wx_1_4; - const auto jx_2_4 = jx_1_4 - Qdxdt * Wx_2_4; - const auto jx_3_4 = jx_2_4 - Qdxdt * Wx_3_4; - - // y-component - const auto jy_0_0 = -Qdydt * Wy_0_0; - const auto jy_0_1 = jy_0_0 - Qdydt * Wy_0_1; - const auto jy_0_2 = jy_0_1 - Qdydt * Wy_0_2; - const auto jy_0_3 = jy_0_2 - Qdydt * Wy_0_3; - - const auto jy_1_0 = -Qdydt * Wy_1_0; - const auto jy_1_1 = jy_1_0 - Qdydt * Wy_1_1; - const auto jy_1_2 = jy_1_1 - Qdydt * Wy_1_2; - const auto jy_1_3 = jy_1_2 - Qdydt * Wy_1_3; - - const auto jy_2_0 = -Qdydt * Wy_2_0; - const auto jy_2_1 = jy_2_0 - Qdydt * Wy_2_1; - const auto jy_2_2 = jy_2_1 - Qdydt * Wy_2_2; - const auto jy_2_3 = jy_2_2 - Qdydt * Wy_2_3; - - const auto jy_3_0 = -Qdydt * Wy_3_0; - const auto jy_3_1 = jy_3_0 - Qdydt * Wy_3_1; - const auto jy_3_2 = jy_3_1 - Qdydt * Wy_3_2; - const auto jy_3_3 = jy_3_2 - Qdydt * Wy_3_3; - - const auto jy_4_0 = -Qdydt * Wy_4_0; - const auto jy_4_1 = jy_4_0 - Qdydt * Wy_4_1; - const auto jy_4_2 = jy_4_1 - Qdydt * Wy_4_2; - const auto jy_4_3 = jy_4_2 - Qdydt * Wy_4_3; - - /* - Current update - */ - auto J_acc = J.access(); - - /* - x - component - */ - J_acc(ix_min, iy_min, cur::jx1) += jx_0_0; - J_acc(ix_min, iy_min + 1, cur::jx1) += jx_0_1; - J_acc(ix_min, iy_min + 2, cur::jx1) += jx_0_2; - J_acc(ix_min, iy_min + 3, cur::jx1) += jx_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx1) += jx_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx1) += jx_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx1) += jx_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx1) += jx_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx1) += jx_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx1) += jx_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx1) += jx_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx1) += jx_2_3; - - if (update_x3) { - J_acc(ix_min + 3, iy_min, cur::jx1) += jx_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx1) += jx_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx1) += jx_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx1) += jx_3_3; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 4, cur::jx1) += jx_0_4; - J_acc(ix_min + 1, iy_min + 4, cur::jx1) += jx_1_4; - J_acc(ix_min + 2, iy_min + 4, cur::jx1) += jx_2_4; - } - - if (update_x3 && update_y3) { - J_acc(ix_min + 3, iy_min + 4, cur::jx1) += jx_3_4; - } - - /* - y - component - */ - J_acc(ix_min, iy_min, cur::jx2) += jy_0_0; - J_acc(ix_min + 1, iy_min, cur::jx2) += jy_1_0; - J_acc(ix_min + 2, iy_min, cur::jx2) += jy_2_0; - J_acc(ix_min + 3, iy_min, cur::jx2) += jy_3_0; - - J_acc(ix_min, iy_min + 1, cur::jx2) += jy_0_1; - J_acc(ix_min + 1, iy_min + 1, cur::jx2) += jy_1_1; - J_acc(ix_min + 2, iy_min + 1, cur::jx2) += jy_2_1; - J_acc(ix_min + 3, iy_min + 1, cur::jx2) += jy_3_1; - - J_acc(ix_min, iy_min + 2, cur::jx2) += jy_0_2; - J_acc(ix_min + 1, iy_min + 2, cur::jx2) += jy_1_2; - J_acc(ix_min + 2, iy_min + 2, cur::jx2) += jy_2_2; - J_acc(ix_min + 3, iy_min + 2, cur::jx2) += jy_3_2; - - if (update_x3) { - J_acc(ix_min + 4, iy_min, cur::jx2) += jy_4_0; - J_acc(ix_min + 4, iy_min + 1, cur::jx2) += jy_4_1; - J_acc(ix_min + 4, iy_min + 2, cur::jx2) += jy_4_2; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 3, cur::jx2) += jy_0_3; - J_acc(ix_min + 1, iy_min + 3, cur::jx2) += jy_1_3; - J_acc(ix_min + 2, iy_min + 3, cur::jx2) += jy_2_3; - J_acc(ix_min + 3, iy_min + 3, cur::jx2) += jy_3_3; - } - - if (update_x3 && update_y3) { - J_acc(ix_min + 4, iy_min + 3, cur::jx2) += jy_4_3; - } - /* - z - component, simulated direction - */ - J_acc(ix_min, iy_min, cur::jx3) += QVz * Wz_0_0; - J_acc(ix_min, iy_min + 1, cur::jx3) += QVz * Wz_0_1; - J_acc(ix_min, iy_min + 2, cur::jx3) += QVz * Wz_0_2; - J_acc(ix_min, iy_min + 3, cur::jx3) += QVz * Wz_0_3; - - J_acc(ix_min + 1, iy_min, cur::jx3) += QVz * Wz_1_0; - J_acc(ix_min + 1, iy_min + 1, cur::jx3) += QVz * Wz_1_1; - J_acc(ix_min + 1, iy_min + 2, cur::jx3) += QVz * Wz_1_2; - J_acc(ix_min + 1, iy_min + 3, cur::jx3) += QVz * Wz_1_3; - - J_acc(ix_min + 2, iy_min, cur::jx3) += QVz * Wz_2_0; - J_acc(ix_min + 2, iy_min + 1, cur::jx3) += QVz * Wz_2_1; - J_acc(ix_min + 2, iy_min + 2, cur::jx3) += QVz * Wz_2_2; - J_acc(ix_min + 2, iy_min + 3, cur::jx3) += QVz * Wz_2_3; - - J_acc(ix_min + 3, iy_min, cur::jx3) += QVz * Wz_3_0; - J_acc(ix_min + 3, iy_min + 1, cur::jx3) += QVz * Wz_3_1; - J_acc(ix_min + 3, iy_min + 2, cur::jx3) += QVz * Wz_3_2; - J_acc(ix_min + 3, iy_min + 3, cur::jx3) += QVz * Wz_3_3; - - if (update_x3) { - J_acc(ix_min + 4, iy_min, cur::jx3) += QVz * Wz_4_0; - J_acc(ix_min + 4, iy_min + 1, cur::jx3) += QVz * Wz_4_1; - J_acc(ix_min + 4, iy_min + 2, cur::jx3) += QVz * Wz_4_2; - J_acc(ix_min + 4, iy_min + 3, cur::jx3) += QVz * Wz_4_3; - } - - if (update_y3) { - J_acc(ix_min, iy_min + 4, cur::jx3) += QVz * Wz_0_4; - J_acc(ix_min + 1, iy_min + 4, cur::jx3) += QVz * Wz_1_4; - J_acc(ix_min + 2, iy_min + 4, cur::jx3) += QVz * Wz_2_4; - J_acc(ix_min + 3, iy_min + 4, cur::jx3) += QVz * Wz_3_4; - } - if (update_x3 && update_y3) { - J_acc(ix_min + 4, iy_min + 4, cur::jx3) += QVz * Wz_4_4; - } - - } // dim -> ToDo: 3D! - } else if constexpr ((O > 3u) && (O < 5u)) { + // } else if constexpr (O == 2u) { + // /* + // * Higher order charge conserving current deposition based on + // * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract + // **/ + + // // iS -> shape function for init position + // // fS -> shape function for final position + + // // shape function at integer points (one coeff is always ZERO) + // int i1_min; + // real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; + // real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; + + // // clang-format off + // prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), + // i1(p), static_cast(dx1(p)), + // i1_min, + // iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, + // fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); + // // clang-format on + + // if constexpr (D == Dim::_1D) { + // raise::KernelNotImplementedError(HERE); + // } else if constexpr (D == Dim::_2D) { + + // // shape function at integer points (one coeff is always ZERO) + // int i2_min; + // real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; + // real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; + + // // clang-format off + // prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), + // i2(p), static_cast(dx2(p)), + // i2_min, + // iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, + // fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); + // // clang-format on + // // x1-components + // const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + + // const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); + // const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); + // const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); + // const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); + + // // x2-components + // const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + + // const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); + // const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); + // const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); + // const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); + + // // x3-components + // const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); + // const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + + // iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + + // const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); + // const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + + // iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + + // const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); + // const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + + // iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + + // const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); + // const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + + // iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + + // // x1-component + // const auto jx1_00 = Wx1_00; + // const auto jx1_10 = jx1_00 + Wx1_10; + // const auto jx1_20 = jx1_10 + Wx1_20; + // const auto jx1_30 = jx1_20 + Wx1_30; + + // const auto jx1_01 = Wx1_01; + // const auto jx1_11 = jx1_01 + Wx1_11; + // const auto jx1_21 = jx1_11 + Wx1_21; + // const auto jx1_31 = jx1_21 + Wx1_31; + + // const auto jx1_02 = Wx1_02; + // const auto jx1_12 = jx1_02 + Wx1_12; + // const auto jx1_22 = jx1_12 + Wx1_22; + // const auto jx1_32 = jx1_22 + Wx1_32; + + // const auto jx1_03 = Wx1_03; + // const auto jx1_13 = jx1_03 + Wx1_13; + // const auto jx1_23 = jx1_13 + Wx1_23; + // const auto jx1_33 = jx1_23 + Wx1_33; + + // // y-component + // const auto jx2_00 = Wx2_00; + // const auto jx2_01 = jx2_00 + Wx2_01; + // const auto jx2_02 = jx2_01 + Wx2_02; + // const auto jx2_03 = jx2_02 + Wx2_03; + + // const auto jx2_10 = Wx2_10; + // const auto jx2_11 = jx2_10 + Wx2_11; + // const auto jx2_12 = jx2_11 + Wx2_12; + // const auto jx2_13 = jx2_12 + Wx2_13; + + // const auto jx2_20 = Wx2_20; + // const auto jx2_21 = jx2_20 + Wx2_21; + // const auto jx2_22 = jx2_21 + Wx2_22; + // const auto jx2_23 = jx2_22 + Wx2_23; + + // const auto jx2_30 = Wx2_30; + // const auto jx2_31 = jx2_30 + Wx2_31; + // const auto jx2_32 = jx2_31 + Wx2_32; + // const auto jx2_33 = jx2_32 + Wx2_33; + + // i1_min += N_GHOSTS; + // i2_min += N_GHOSTS; + + // // @TODO: not sure about the signs here + // const real_t Qdx1dt = -coeff * inv_dt; + // const real_t Qdx2dt = -coeff * inv_dt; + // const real_t QVx3 = coeff * vp[2]; + + // auto J_acc = J.access(); + + // // x1-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + + // // x2-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; + + // // x3-currents + // J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; + // J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; + // J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; + // J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + + // J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; + // J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; + // J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; + // J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + + // J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; + // J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; + // J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; + // J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + + // J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; + // J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; + // J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; + // J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + + // } else if constexpr (D == Dim::_3D) { + // raise::KernelNotImplementedError(HERE); + // } // dimension + + } else if constexpr ((O > 1u) && (O < 6u)) { // shape function in dim1 -> always required - real_t iS_x1[O + 2], fS_x1[O + 2]; + real_t iS_x1[O + 2], fS_x1[O + 2]; // indices of the shape function - ncells_t i1_min; + int i1_min, i1_max; // call shape function prtl_shape::for_deposit(i1_prev(p), @@ -1903,31 +639,34 @@ namespace kernel { i1(p), static_cast(dx1(p)), i1_min, + i1_max, iS_x1, fS_x1); if constexpr (D == Dim::_1D) { // ToDo + raise::KernelNotImplementedError(HERE); } else if constexpr (D == Dim::_2D) { // shape function in dim1 -> always required - real_t iS_x2[O + 2], fS_x2[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t i2_min; + int i2_min, i2_max; // call shape function prtl_shape::for_deposit(i2_prev(p), - static_cast(dx2_prev(p)), - i2(p), - static_cast(dx2(p)), - i2_min, - iS_x2, - fS_x2); + static_cast(dx2_prev(p)), + i2(p), + static_cast(dx2(p)), + i2_min, + i2_max, + iS_x2, + fS_x2); // define weight tensors - real_t Wx[O + 2][O + 2]; - real_t Wy[O + 2][O + 2]; - real_t Wz[O + 2][O + 2]; + real_t Wx1[O + 2][O + 2]; + real_t Wx2[O + 2][O + 2]; + real_t Wx3[O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -1935,21 +674,28 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 - Wx[i][j] = (fS_x1[i] - iS_x1[i]) * - (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + Wx1[i][j] = (fS_x1[i] - iS_x1[i]) * + (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + + Wx2[i][j] = (fS_x2[j] - iS_x2[j]) * + (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + + Wx3[i][j] = iS_x1[i] * iS_x2[j] + + HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + + HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + + THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wy[i][j] = (fS_x2[j] - iS_x2[j]) * - (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + // Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wz[i][j] = iS_x1[i] * iS_x2[j] + - HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + - HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + - THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + // Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + + // Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x2[j]) + + // iS_x2[j] * (HALF * fS_x2[j] + iS_x2[i])); } } // contribution within the shape function stencil - real_t jx[O + 2][O + 2], jy[O + 2][O + 2], jz[O + 2][O + 2]; + real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2], jx3[O + 2][O + 2]; // prefactors for j update const real_t Qdx1dt = -coeff * inv_dt; @@ -1958,80 +704,95 @@ namespace kernel { // Calculate current contribution - // jx + // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[0][j] = Wx[0][j]; + jx1[0][j] = Wx1[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx[i][j] = jx[i - 1][j] + Wx[i][j]; + jx1[i][j] = jx1[i - 1][j] + Wx1[i][j]; } } - // jy + // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][0] = Wy[i][0]; + jx2[i][0] = Wx2[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jy[i][j] = jy[i][j - 1] + Wy[i][j]; + jx2[i][j] = jx2[i][j - 1] + Wx2[i][j]; } } - // jz + // jx3 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jz[i][j] = Wz[i][j]; + jx3[i][j] = Wx3[i][j]; } } // account for ghost cells i1_min += N_GHOSTS; i2_min += N_GHOSTS; + i1_max += N_GHOSTS; + i2_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + const int di_x2 = i2_max - i2_min; /* Current update */ auto J_acc = J.access(); -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx[i][j]; - J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jy[i][j]; - J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jz[i][j]; + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; + } + } + + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; + } + } + + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; } } } else if constexpr (D == Dim::_3D) { // shape function in dim2 - real_t iS_x2[O + 2], fS_x2[O + 2]; + real_t iS_x2[O + 2], fS_x2[O + 2]; // indices of the shape function - ncells_t i2_min; + int i2_min, i2_max; // call shape function prtl_shape::for_deposit(i2_prev(p), static_cast(dx2_prev(p)), i2(p), static_cast(dx2(p)), i2_min, + i2_max, iS_x2, fS_x2); // shape function in dim3 - real_t iS_x3[O + 2], fS_x3[O + 2]; + real_t iS_x3[O + 2], fS_x3[O + 2]; // indices of the shape function - ncells_t i3_min; + int i3_min, i3_max; // call shape function prtl_shape::for_deposit(i3_prev(p), @@ -2039,13 +800,14 @@ namespace kernel { i3(p), static_cast(dx3(p)), i3_min, + i3_max, iS_x3, fS_x3); // define weight tensors - real_t Wx[O + 1][O + 1][O + 1]; - real_t Wy[O + 1][O + 1][O + 1]; - real_t Wz[O + 1][O + 1][O + 1]; + real_t Wx1[O + 2][O + 2][O + 2]; + real_t Wx2[O + 2][O + 2][O + 2]; + real_t Wx3[O + 2][O + 2][O + 2]; // Calculate weight function #pragma unroll @@ -2055,24 +817,24 @@ namespace kernel { #pragma unroll for (int k = 0; k < O + 2; ++k) { // Esirkepov 2001, Eq. 31 - Wx[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * - ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + - HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); + Wx1[i][j][k] = THIRD * (fS_x1[i] - iS_x1[i]) * + ((iS_x2[j] * iS_x3[k] + fS_x2[j] * fS_x3[k]) + + HALF * (iS_x3[k] * fS_x2[j] + iS_x2[j] * fS_x3[k])); - Wy[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * - (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + - HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); + Wx2[i][j][k] = THIRD * (fS_x2[j] - iS_x2[j]) * + (iS_x1[i] * iS_x3[k] + fS_x1[i] * fS_x3[k] + + HALF * (iS_x3[k] * fS_x1[i] + iS_x1[i] * fS_x3[k])); - Wz[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * - (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + - HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); + Wx3[i][j][k] = THIRD * (fS_x3[k] - iS_x3[k]) * + (iS_x1[i] * iS_x2[j] + fS_x1[i] * fS_x2[j] + + HALF * (iS_x1[i] * fS_x2[j] + iS_x2[j] * fS_x1[i])); } } } // contribution within the shape function stencil - real_t jx[O + 2][O + 2][O + 2], jy[O + 2][O + 2][O + 2], - jz[O + 2][O + 2][O + 2]; + real_t jx1[O + 2][O + 2][O + 2], jx2[O + 2][O + 2][O + 2], + jx3[O + 2][O + 2][O + 2]; // prefactors to j update const real_t Qdxdt = coeff * inv_dt; @@ -2081,12 +843,12 @@ namespace kernel { // Calculate current contribution - // jx + // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jx[0][j][k] = -Qdxdt * Wx[0][j][k]; + jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } @@ -2096,17 +858,17 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 0; j < O + 2; ++k) { - jx[i][j][k] = jx[i - 1][j][k] - Qdxdt * Wx[i][j][k]; + jx1[i][j][k] = jx1[i - 1][j][k] - Qdxdt * Wx1[i][j][k]; } } } - // jy + // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jy[i][0][k] = -Qdydt * Wy[i][0][k]; + jx2[i][0][k] = -Qdydt * Wx2[i][0][k]; } } @@ -2116,17 +878,17 @@ namespace kernel { for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int k = 0; k < O + 2; ++k) { - jy[i][j][k] = jy[i][j - 1][k] - Qdydt * Wy[i][j][k]; + jx2[i][j][k] = jx2[i][j - 1][k] - Qdydt * Wx2[i][j][k]; } } } - // jz + // jx3 #pragma unroll for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jy[i][j][0] = -Qdydt * Wy[i][j][0]; + jx2[i][j][0] = -Qdydt * Wx2[i][j][0]; } } @@ -2136,7 +898,7 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - jz[i][j][k] = jz[i][j][k - 1] - Qdzdt * Wz[i][j][k]; + jx3[i][j][k] = jx3[i][j][k - 1] - Qdzdt * Wx3[i][j][k]; } } } @@ -2152,9 +914,9 @@ namespace kernel { for (int j = 0; j < O + 2; ++j) { #pragma unroll for (int k = 1; k < O + 2; ++k) { - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jy[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jz[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx1[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jx2[i][j][k]; + J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jx3[i][j][k]; } } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c793ee678..776271c71 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -19,7 +19,25 @@ namespace prtl_shape { template Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { - if constexpr (O == 2u) { + if constexpr (O == 1u) { + // S(x) = 1 - |x| |x| < 1 + // 0.0 |x| ≥ 1 + if constexpr (not STAGGERED) { // compute at i positions + i_min = i; + S[0] = ONE - di; + S[1] = di; + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 1; + S[0] = HALF - di; + S[1] = ONE - S[0]; + } else { + i_min = i; + S[1] = static_cast(1.5) - di; + S[0] = ONE - S[1]; + } + } // staggered + } else if constexpr (O == 2u) { // 3/4 - |x|^2 |x| < 1/2 // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 // 0.0 |x| ≥ 3/2 @@ -194,7 +212,7 @@ namespace prtl_shape { real_t& fS_3) { /* - The second order shape function per particle is a 4 element array + The second order shape function per particle is a 4 element array where the shape function contributes to only 3 elements. We need to find which indices are contributing to the shape function For this we first compute the indices of the particle position @@ -262,6 +280,7 @@ namespace prtl_shape { const int& i_fin, const real_t& di_fin, int& i_min, + int& i_max, real_t* iS, real_t* fS) { @@ -294,6 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; + i_max = i_fin_min + O + 1; #pragma unroll for (int j = 0; j < O; j++) { @@ -309,6 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; + i_max = i_init_min + O + 1; iS[0] = ZERO; #pragma unroll @@ -324,6 +345,7 @@ namespace prtl_shape { } else { i_min = i_init_min; + i_max = i_min + O; #pragma unroll for (int j = 0; j < O; j++) { @@ -338,7 +360,6 @@ namespace prtl_shape { fS[O + 1] = ZERO; } } - } // namespace prtl_shape #endif // KERNELS_PARTICLE_SHAPES_HPP From 618578330414cd653cea41d99c4ecc89dcaca3ca Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 6 Aug 2025 14:39:00 -0500 Subject: [PATCH 054/154] bugfix --- src/kernels/particle_shapes.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 776271c71..fa3649db4 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -313,7 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; - i_max = i_fin_min + O + 1; + i_max = i_fin_min + O; #pragma unroll for (int j = 0; j < O; j++) { @@ -329,7 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; - i_max = i_init_min + O + 1; + i_max = i_init_min + O; iS[0] = ZERO; #pragma unroll From d55240d8f8c21582c1d2b4fd5cd0e08f13947336 Mon Sep 17 00:00:00 2001 From: haykh Date: Wed, 6 Aug 2025 15:53:11 -0400 Subject: [PATCH 055/154] fixed old zigzag --- src/kernels/currents_deposit.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index b363b3819..bab0bd158 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -156,7 +156,6 @@ namespace kernel { /* Zig-zag deposit */ - const auto dxp_r_1 { static_cast(i1(p) == i1_prev(p)) * (dx1(p) + dx1_prev(p)) * static_cast(INV_2) }; @@ -247,7 +246,7 @@ namespace kernel { cur::jx3) += Fx3_1 * (ONE - Wx1_1) * (ONE - Wx2_1); J_acc(i1_prev(p) + N_GHOSTS + 1, i2_prev(p) + N_GHOSTS, - cur::jx3) += Fx3_1 * Wx1_2 * (ONE - Wx2_1); + cur::jx3) += Fx3_1 * Wx1_1 * (ONE - Wx2_1); J_acc(i1_prev(p) + N_GHOSTS, i2_prev(p) + N_GHOSTS + 1, cur::jx3) += Fx3_1 * (ONE - Wx1_1) * Wx2_1; From 87a80c2c61553ff877d01c48bd7185a707c3e67a Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 7 Aug 2025 14:57:21 -0400 Subject: [PATCH 056/154] esirkepov ranges fixed --- pgens/streaming/pgen.hpp | 11 + src/engines/srpic.hpp | 52 ++-- src/kernels/currents_deposit.hpp | 490 +++++++++++++++---------------- src/kernels/particle_shapes.hpp | 18 +- 4 files changed, 289 insertions(+), 282 deletions(-) diff --git a/pgens/streaming/pgen.hpp b/pgens/streaming/pgen.hpp index ee14712de..a08204ac1 100644 --- a/pgens/streaming/pgen.hpp +++ b/pgens/streaming/pgen.hpp @@ -103,6 +103,17 @@ namespace user { domain, injector, densities[n / 2]); + // for (auto& i : { n, n + 1 }) { + // auto& ux2 = domain.species[i].ux2; + // auto& ux3 = domain.species[i].ux3; + // Kokkos::parallel_for( + // "Remove_ux2ux3", + // domain.species[i].npart(), + // Lambda(index_t p) { + // ux2(p) = ZERO; + // ux3(p) = ZERO; + // }); + // } } } }; diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 83c4e9bda..b63415a02 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -507,6 +507,26 @@ namespace ntt { } } + template + void deposit_with(const Particles& species, + const M& metric, + const scatter_ndfield_t& scatter_cur, + real_t dt) { + // clang-format off + Kokkos::parallel_for("CurrentsDeposit", + species.rangeActiveParticles(), + kernel::DepositCurrents_kernel( + scatter_cur, + species.i1, species.i2, species.i3, + species.i1_prev, species.i2_prev, species.i3_prev, + species.dx1, species.dx2, species.dx3, + species.dx1_prev, species.dx2_prev, species.dx3_prev, + species.ux1, species.ux2, species.ux3, + species.phi, species.weight, species.tag, + metric, (real_t)(species.charge()), dt)); + // clang-format on + } + void CurrentsDeposit(domain_t& domain) { auto scatter_cur = Kokkos::Experimental::create_scatter_view( domain.fields.cur); @@ -523,34 +543,12 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - if (shape_order == 1) { - // clang-format off - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, species.i2, species.i3, - species.i1_prev, species.i2_prev, species.i3_prev, - species.dx1, species.dx2, species.dx3, - species.dx1_prev, species.dx2_prev, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.phi, species.weight, species.tag, - domain.mesh.metric, (real_t)(species.charge()), dt)); - // clang-format on + if (shape_order == 0) { + deposit_with<0u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 1) { + deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 2) { - // clang-format off - Kokkos::parallel_for("CurrentsDeposit", - species.rangeActiveParticles(), - kernel::DepositCurrents_kernel( - scatter_cur, - species.i1, species.i2, species.i3, - species.i1_prev, species.i2_prev, species.i3_prev, - species.dx1, species.dx2, species.dx3, - species.dx1_prev, species.dx2_prev, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.phi, species.weight, species.tag, - domain.mesh.metric, (real_t)(species.charge()), dt)); - // clang-format on + deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index d8348ed02..516108004 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -166,7 +166,7 @@ namespace kernel { const real_t coeff { weight(p) * charge }; // ToDo: interpolation_order as parameter - if constexpr (O == 1u) { + if constexpr (O == 0u) { /* Zig-zag deposit */ @@ -191,7 +191,6 @@ namespace kernel { auto J_acc = J.access(); - // tuple_t dxp_r; if constexpr (D == Dim::_1D) { const real_t Fx2_1 { HALF * vp[1] * coeff }; const real_t Fx2_2 { HALF * vp[1] * coeff }; @@ -402,244 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - - // } else if constexpr (O == 2u) { - // /* - // * Higher order charge conserving current deposition based on - // * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract - // **/ - - // // iS -> shape function for init position - // // fS -> shape function for final position - - // // shape function at integer points (one coeff is always ZERO) - // int i1_min; - // real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; - // real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - - // // clang-format off - // prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), - // i1(p), static_cast(dx1(p)), - // i1_min, - // iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, - // fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); - // // clang-format on - - // if constexpr (D == Dim::_1D) { - // raise::KernelNotImplementedError(HERE); - // } else if constexpr (D == Dim::_2D) { - - // // shape function at integer points (one coeff is always ZERO) - // int i2_min; - // real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; - // real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - - // // clang-format off - // prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), - // i2(p), static_cast(dx2(p)), - // i2_min, - // iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, - // fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); - // // clang-format on - // // x1-components - // const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - - // const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); - // const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); - // const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); - // const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - - // // x2-components - // const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - - // const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); - // const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); - // const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); - // const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - - // // x3-components - // const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); - // const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + - // iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - - // const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); - // const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + - // iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - - // const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); - // const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + - // iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - - // const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); - // const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + - // iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - - // // x1-component - // const auto jx1_00 = Wx1_00; - // const auto jx1_10 = jx1_00 + Wx1_10; - // const auto jx1_20 = jx1_10 + Wx1_20; - // const auto jx1_30 = jx1_20 + Wx1_30; - - // const auto jx1_01 = Wx1_01; - // const auto jx1_11 = jx1_01 + Wx1_11; - // const auto jx1_21 = jx1_11 + Wx1_21; - // const auto jx1_31 = jx1_21 + Wx1_31; - - // const auto jx1_02 = Wx1_02; - // const auto jx1_12 = jx1_02 + Wx1_12; - // const auto jx1_22 = jx1_12 + Wx1_22; - // const auto jx1_32 = jx1_22 + Wx1_32; - - // const auto jx1_03 = Wx1_03; - // const auto jx1_13 = jx1_03 + Wx1_13; - // const auto jx1_23 = jx1_13 + Wx1_23; - // const auto jx1_33 = jx1_23 + Wx1_33; - - // // y-component - // const auto jx2_00 = Wx2_00; - // const auto jx2_01 = jx2_00 + Wx2_01; - // const auto jx2_02 = jx2_01 + Wx2_02; - // const auto jx2_03 = jx2_02 + Wx2_03; - - // const auto jx2_10 = Wx2_10; - // const auto jx2_11 = jx2_10 + Wx2_11; - // const auto jx2_12 = jx2_11 + Wx2_12; - // const auto jx2_13 = jx2_12 + Wx2_13; - - // const auto jx2_20 = Wx2_20; - // const auto jx2_21 = jx2_20 + Wx2_21; - // const auto jx2_22 = jx2_21 + Wx2_22; - // const auto jx2_23 = jx2_22 + Wx2_23; - - // const auto jx2_30 = Wx2_30; - // const auto jx2_31 = jx2_30 + Wx2_31; - // const auto jx2_32 = jx2_31 + Wx2_32; - // const auto jx2_33 = jx2_32 + Wx2_33; - - // i1_min += N_GHOSTS; - // i2_min += N_GHOSTS; - - // // @TODO: not sure about the signs here - // const real_t Qdx1dt = -coeff * inv_dt; - // const real_t Qdx2dt = -coeff * inv_dt; - // const real_t QVx3 = coeff * vp[2]; - - // auto J_acc = J.access(); - - // // x1-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - - // // x2-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - - // // x3-currents - // J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; - // J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; - // J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; - // J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - - // J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; - // J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; - // J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; - // J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - - // J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; - // J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; - // J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; - // J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - - // J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; - // J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; - // J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; - // J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - - // } else if constexpr (D == Dim::_3D) { - // raise::KernelNotImplementedError(HERE); - // } // dimension - - } else if constexpr ((O > 1u) && (O < 6u)) { + } else if constexpr ((O >= 1u) and (O <= 5u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; @@ -769,20 +531,20 @@ namespace kernel { */ auto J_acc = J.access(); - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; } } - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; } } - for (int i = 0; i < di_x1; ++i) { - for (int j = 0; j < di_x2; ++j) { + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; } } @@ -943,5 +705,241 @@ namespace kernel { } // namespace kernel #undef i_di_to_Xi +// +// } else if constexpr (O == 2u) { +// /* +// * Higher order charge conserving current deposition based on +// * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract +// **/ + +// // iS -> shape function for init position +// // fS -> shape function for final position + +// // shape function at integer points (one coeff is always ZERO) +// int i1_min; +// real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; +// real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; + +// // clang-format off +// prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), +// i1(p), static_cast(dx1(p)), +// i1_min, +// iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, +// fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); +// // clang-format on + +// if constexpr (D == Dim::_1D) { +// raise::KernelNotImplementedError(HERE); +// } else if constexpr (D == Dim::_2D) { + +// // shape function at integer points (one coeff is always ZERO) +// int i2_min; +// real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; +// real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; + +// // clang-format off +// prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), +// i2(p), static_cast(dx2(p)), +// i2_min, +// iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, +// fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); +// // clang-format on +// // x1-components +// const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); + +// const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); +// const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); +// const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); +// const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); + +// // x2-components +// const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); + +// const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); +// const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); +// const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); +// const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); + +// // x3-components +// const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); +// const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + +// iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); + +// const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); +// const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + +// iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); + +// const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); +// const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + +// iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); + +// const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); +// const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + +// iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); + +// // x1-component +// const auto jx1_00 = Wx1_00; +// const auto jx1_10 = jx1_00 + Wx1_10; +// const auto jx1_20 = jx1_10 + Wx1_20; +// const auto jx1_30 = jx1_20 + Wx1_30; + +// const auto jx1_01 = Wx1_01; +// const auto jx1_11 = jx1_01 + Wx1_11; +// const auto jx1_21 = jx1_11 + Wx1_21; +// const auto jx1_31 = jx1_21 + Wx1_31; + +// const auto jx1_02 = Wx1_02; +// const auto jx1_12 = jx1_02 + Wx1_12; +// const auto jx1_22 = jx1_12 + Wx1_22; +// const auto jx1_32 = jx1_22 + Wx1_32; + +// const auto jx1_03 = Wx1_03; +// const auto jx1_13 = jx1_03 + Wx1_13; +// const auto jx1_23 = jx1_13 + Wx1_23; +// const auto jx1_33 = jx1_23 + Wx1_33; + +// // y-component +// const auto jx2_00 = Wx2_00; +// const auto jx2_01 = jx2_00 + Wx2_01; +// const auto jx2_02 = jx2_01 + Wx2_02; +// const auto jx2_03 = jx2_02 + Wx2_03; + +// const auto jx2_10 = Wx2_10; +// const auto jx2_11 = jx2_10 + Wx2_11; +// const auto jx2_12 = jx2_11 + Wx2_12; +// const auto jx2_13 = jx2_12 + Wx2_13; + +// const auto jx2_20 = Wx2_20; +// const auto jx2_21 = jx2_20 + Wx2_21; +// const auto jx2_22 = jx2_21 + Wx2_22; +// const auto jx2_23 = jx2_22 + Wx2_23; + +// const auto jx2_30 = Wx2_30; +// const auto jx2_31 = jx2_30 + Wx2_31; +// const auto jx2_32 = jx2_31 + Wx2_32; +// const auto jx2_33 = jx2_32 + Wx2_33; + +// i1_min += N_GHOSTS; +// i2_min += N_GHOSTS; + +// // @TODO: not sure about the signs here +// const real_t Qdx1dt = -coeff * inv_dt; +// const real_t Qdx2dt = -coeff * inv_dt; +// const real_t QVx3 = coeff * vp[2]; + +// auto J_acc = J.access(); + +// // x1-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; + +// // x2-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; + +// // x3-currents +// J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; +// J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; +// J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; +// J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; + +// J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; +// J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; +// J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; +// J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; + +// J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; +// J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; +// J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; +// J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; + +// J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; +// J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; +// J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; +// J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; + +// } else if constexpr (D == Dim::_3D) { +// raise::KernelNotImplementedError(HERE); +// } // dimension #endif // KERNELS_CURRENTS_DEPOSIT_HPP diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index fa3649db4..7d626c9d6 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -18,7 +18,7 @@ namespace prtl_shape { template - Inline void order(const int& i, const real_t& di, int& i_min, real_t* S) { + Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { // S(x) = 1 - |x| |x| < 1 // 0.0 |x| ≥ 1 @@ -281,8 +281,8 @@ namespace prtl_shape { const real_t& di_fin, int& i_min, int& i_max, - real_t* iS, - real_t* fS) { + real_t iS[O + 2], + real_t fS[O + 2]) { /* The N-th order shape function per particle is a N+2 element array @@ -316,14 +316,14 @@ namespace prtl_shape { i_max = i_fin_min + O; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j] = iS_[j]; } iS[O + 1] = ZERO; fS[0] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j + 1] = fS_[j]; } @@ -333,12 +333,12 @@ namespace prtl_shape { iS[0] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j + 1] = iS_[j]; } #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j] = fS_[j]; } fS[O + 1] = ZERO; @@ -348,13 +348,13 @@ namespace prtl_shape { i_max = i_min + O; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { iS[j] = iS_[j]; } iS[O + 1] = ZERO; #pragma unroll - for (int j = 0; j < O; j++) { + for (int j = 0; j < O + 1; j++) { fS[j] = fS_[j]; } fS[O + 1] = ZERO; From 877e9f54b4395582e744817bf7c42de615a0d205 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Tue, 12 Aug 2025 18:17:30 -0500 Subject: [PATCH 057/154] generalized field interpolation to arbitrary order (wip) --- src/kernels/particle_pusher_sr.hpp | 1308 +++++++++++----------------- 1 file changed, 522 insertions(+), 786 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index dff92677e..873f488c0 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -22,6 +22,8 @@ #include "utils/error.h" #include "utils/numeric.h" +#include "particle_shapes.hpp" + #if defined(MPI_ENABLED) #include "arch/mpi_tags.h" #endif @@ -30,7 +32,9 @@ /* Local macros */ /* -------------------------------------------------------------------------- */ #define from_Xi_to_i(XI, I) \ - { I = static_cast((XI + 1)) - 1; } + { \ + I = static_cast((XI + 1)) - 1; \ + } #define from_Xi_to_i_di(XI, I, DI) \ { \ @@ -473,9 +477,9 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // getInterpFlds(p, ei, bi); - // ToDo: Better way to call this - // getInterpFlds2nd(p, ei, bi); + // field interpolation 1st-6th order + //getInterpFlds(p, ei, bi); + for (auto i { 0u }; i < 3u; ++i) { ei[i] = ZERO; bi[i] = ZERO; @@ -834,791 +838,523 @@ namespace kernel::sr { Inline void getInterpFlds(index_t& p, vec_t& e0, vec_t& b0) const { - if constexpr (D == Dim::_1D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - - // first order - real_t c0, c1; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - - // Ex1 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::ex1); - c1 = EB(i + indx, em::ex1); - e0[0] = c0 * pondmx + c1 * pondpx; - // Ex2 - // Interpolate --- (primal) - c0 = EB(i, em::ex2); - c1 = EB(i + 1, em::ex2); - e0[1] = c0 * ponpmx + c1 * ponppx; - // Ex3 - // Interpolate --- (primal) - c0 = EB(i, em::ex3); - c1 = EB(i + 1, em::ex3); - e0[2] = c0 * ponpmx + c1 * ponppx; - // Bx1 - // Interpolate --- (primal) - c0 = EB(i, em::bx1); - c1 = EB(i + 1, em::bx1); - b0[0] = c0 * ponpmx + c1 * ponppx; - // Bx2 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::bx2); - c1 = EB(i + indx, em::bx2); - b0[1] = c0 * pondmx + c1 * pondpx; - // Bx3 - // Interpolate --- (dual) - c0 = EB(i - 1 + indx, em::bx3); - c1 = EB(i + indx, em::bx3); - b0[2] = c0 * pondmx + c1 * pondpx; - } else if constexpr (D == Dim::_2D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - int indy = static_cast(dx2_ + HALF); - - // first order - real_t c000, c100, c010, c110, c00, c10; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - real_t ponpmy = ONE - dx2_; - real_t ponppy = dx2_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); - real_t pondpy = ONE - pondmy; - - // Ex1 - // Interpolate --- (dual, primal) - c000 = EB(i - 1 + indx, j, em::ex1); - c100 = EB(i + indx, j, em::ex1); - c010 = EB(i - 1 + indx, j + 1, em::ex1); - c110 = EB(i + indx, j + 1, em::ex1); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - e0[0] = c00 * ponpmy + c10 * ponppy; - // Ex2 - // Interpolate -- (primal, dual) - c000 = EB(i, j - 1 + indy, em::ex2); - c100 = EB(i + 1, j - 1 + indy, em::ex2); - c010 = EB(i, j + indy, em::ex2); - c110 = EB(i + 1, j + indy, em::ex2); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - e0[1] = c00 * pondmy + c10 * pondpy; - // Ex3 - // Interpolate -- (primal, primal) - c000 = EB(i, j, em::ex3); - c100 = EB(i + 1, j, em::ex3); - c010 = EB(i, j + 1, em::ex3); - c110 = EB(i + 1, j + 1, em::ex3); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - e0[2] = c00 * ponpmy + c10 * ponppy; - - // Bx1 - // Interpolate -- (primal, dual) - c000 = EB(i, j - 1 + indy, em::bx1); - c100 = EB(i + 1, j - 1 + indy, em::bx1); - c010 = EB(i, j + indy, em::bx1); - c110 = EB(i + 1, j + indy, em::bx1); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - b0[0] = c00 * pondmy + c10 * pondpy; - // Bx2 - // Interpolate -- (dual, primal) - c000 = EB(i - 1 + indx, j, em::bx2); - c100 = EB(i + indx, j, em::bx2); - c010 = EB(i - 1 + indx, j + 1, em::bx2); - c110 = EB(i + indx, j + 1, em::bx2); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - b0[1] = c00 * ponpmy + c10 * ponppy; - // Bx3 - // Interpolate -- (dual, dual) - c000 = EB(i - 1 + indx, j - 1 + indy, em::bx3); - c100 = EB(i + indx, j - 1 + indy, em::bx3); - c010 = EB(i - 1 + indx, j + indy, em::bx3); - c110 = EB(i + indx, j + indy, em::bx3); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - b0[2] = c00 * pondmy + c10 * pondpy; - } else if constexpr (D == Dim::_3D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // direct interpolation - Arno - int indx = static_cast(dx1_ + HALF); - int indy = static_cast(dx2_ + HALF); - int indz = static_cast(dx3_ + HALF); - - // first order - real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, - c11, c0, c1; - - real_t ponpmx = ONE - dx1_; - real_t ponppx = dx1_; - real_t ponpmy = ONE - dx2_; - real_t ponppy = dx2_; - real_t ponpmz = ONE - dx3_; - real_t ponppz = dx3_; - - real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); - real_t pondpx = ONE - pondmx; - real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); - real_t pondpy = ONE - pondmy; - real_t pondmz = static_cast(indz + ONE) - (dx3_ + HALF); - real_t pondpz = ONE - pondmz; - - // Ex1 - // Interpolate --- (dual, primal, primal) - c000 = EB(i - 1 + indx, j, k, em::ex1); - c100 = EB(i + indx, j, k, em::ex1); - c010 = EB(i - 1 + indx, j + 1, k, em::ex1); - c110 = EB(i + indx, j + 1, k, em::ex1); - c001 = EB(i - 1 + indx, j, k + 1, em::ex1); - c101 = EB(i + indx, j, k + 1, em::ex1); - c011 = EB(i - 1 + indx, j + 1, k + 1, em::ex1); - c111 = EB(i + indx, j + 1, k + 1, em::ex1); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - e0[0] = c0 * ponpmz + c1 * ponppz; - // Ex2 - // Interpolate -- (primal, dual, primal) - c000 = EB(i, j - 1 + indy, k, em::ex2); - c100 = EB(i + 1, j - 1 + indy, k, em::ex2); - c010 = EB(i, j + indy, k, em::ex2); - c110 = EB(i + 1, j + indy, k, em::ex2); - c001 = EB(i, j - 1 + indy, k + 1, em::ex2); - c101 = EB(i + 1, j - 1 + indy, k + 1, em::ex2); - c011 = EB(i, j + indy, k + 1, em::ex2); - c111 = EB(i + 1, j + indy, k + 1, em::ex2); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * pondmy + c10 * pondpy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * pondmy + c11 * pondpy; - e0[1] = c0 * ponpmz + c1 * ponppz; - // Ex3 - // Interpolate -- (primal, primal, dual) - c000 = EB(i, j, k - 1 + indz, em::ex3); - c100 = EB(i + 1, j, k - 1 + indz, em::ex3); - c010 = EB(i, j + 1, k - 1 + indz, em::ex3); - c110 = EB(i + 1, j + 1, k - 1 + indz, em::ex3); - c001 = EB(i, j, k + indz, em::ex3); - c101 = EB(i + 1, j, k + indz, em::ex3); - c011 = EB(i, j + 1, k + indz, em::ex3); - c111 = EB(i + 1, j + 1, k + indz, em::ex3); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * ponpmy + c11 * ponppy; - e0[2] = c0 * pondmz + c1 * pondpz; - - // Bx1 - // Interpolate -- (primal, dual, dual) - c000 = EB(i, j - 1 + indy, k - 1 + indz, em::bx1); - c100 = EB(i + 1, j - 1 + indy, k - 1 + indz, em::bx1); - c010 = EB(i, j + indy, k - 1 + indz, em::bx1); - c110 = EB(i + 1, j + indy, k - 1 + indz, em::bx1); - c001 = EB(i, j - 1 + indy, k + indz, em::bx1); - c101 = EB(i + 1, j - 1 + indy, k + indz, em::bx1); - c011 = EB(i, j + indy, k + indz, em::bx1); - c111 = EB(i + 1, j + indy, k + indz, em::bx1); - c00 = c000 * ponpmx + c100 * ponppx; - c10 = c010 * ponpmx + c110 * ponppx; - c0 = c00 * pondmy + c10 * pondpy; - c01 = c001 * ponpmx + c101 * ponppx; - c11 = c011 * ponpmx + c111 * ponppx; - c1 = c01 * pondmy + c11 * pondpy; - b0[0] = c0 * pondmz + c1 * pondpz; - // Bx2 - // Interpolate -- (dual, primal, dual) - c000 = EB(i - 1 + indx, j, k - 1 + indz, em::bx2); - c100 = EB(i + indx, j, k - 1 + indz, em::bx2); - c010 = EB(i - 1 + indx, j + 1, k - 1 + indz, em::bx2); - c110 = EB(i + indx, j + 1, k - 1 + indz, em::bx2); - c001 = EB(i - 1 + indx, j, k + indz, em::bx2); - c101 = EB(i + indx, j, k + indz, em::bx2); - c011 = EB(i - 1 + indx, j + 1, k + indz, em::bx2); - c111 = EB(i + indx, j + 1, k + indz, em::bx2); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - b0[1] = c0 * pondmz + c1 * pondpz; - // Bx3 - // Interpolate -- (dual, dual, primal) - c000 = EB(i - 1 + indx, j - 1 + indy, k, em::bx3); - c100 = EB(i + indx, j - 1 + indy, k, em::bx3); - c010 = EB(i - 1 + indx, j + indy, k, em::bx3); - c110 = EB(i + indx, j + indy, k, em::bx3); - c001 = EB(i - 1 + indx, j - 1 + indy, k + 1, em::bx3); - c101 = EB(i + indx, j - 1 + indy, k + 1, em::bx3); - c011 = EB(i - 1 + indx, j + indy, k + 1, em::bx3); - c111 = EB(i + indx, j + indy, k + 1, em::bx3); - c00 = c000 * pondmx + c100 * pondpx; - c10 = c010 * pondmx + c110 * pondpx; - c0 = c00 * ponpmy + c10 * ponppy; - c01 = c001 * pondmx + c101 * pondpx; - c11 = c011 * pondmx + c111 * pondpx; - c1 = c01 * ponpmy + c11 * ponppy; - b0[2] = c0 * ponpmz + c1 * ponppz; - } - } - Inline void getInterpFlds2nd(index_t& p, - vec_t& e0, - vec_t& b0) const { - if constexpr (D == Dim::_1D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0p = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1p = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2p = ONE - w0p - w1p; - - // dual - const auto w0d = HALF * SQR(ONE - dx1_); - const auto w2d = HALF * SQR(dx1_); - const auto w1d = ONE - w0d - w2d; - - // Ex1 (dual grid) - const auto ex1_0 = EB(i - 1, em::ex1); - const auto ex1_1 = EB(i, em::ex1); - const auto ex1_2 = EB(i + 1, em::ex1); - e0[0] = ex1_0 * w0d + ex1_1 * w1d + ex1_2 * w2d; - - // Ex2 (primal grid) - const auto ex2_0 = EB(indx + i - 1, em::ex2); - const auto ex2_1 = EB(indx + i, em::ex2); - const auto ex2_2 = EB(indx + i + 1, em::ex2); - e0[1] = ex2_0 * w0p + ex2_1 * w1p + ex2_2 * w2p; - - // Ex3 (primal grid) - const auto ex3_0 = EB(indx + i - 1, em::ex3); - const auto ex3_1 = EB(indx + i, em::ex3); - const auto ex3_2 = EB(indx + i + 1, em::ex3); - e0[2] = ex3_0 * w0p + ex3_1 * w1p + ex3_2 * w2p; - - // Bx1 (primal grid) - const auto bx1_0 = EB(indx + i - 1, em::bx1); - const auto bx1_1 = EB(indx + i, em::bx1); - const auto bx1_2 = EB(indx + i + 1, em::bx1); - b0[0] = bx1_0 * w0p + bx1_1 * w1p + bx1_2 * w2p; - - // Bx2 (dual grid) - const auto bx2_0 = EB(i - 1, em::bx2); - const auto bx2_1 = EB(i, em::bx2); - const auto bx2_2 = EB(i + 1, em::bx2); - b0[1] = bx2_0 * w0d + bx2_1 * w1d + bx2_2 * w2d; - - // Bx3 (dual grid) - const auto bx3_0 = EB(i - 1, em::bx3); - const auto bx3_1 = EB(i, em::bx3); - const auto bx3_2 = EB(i + 1, em::bx3); - b0[2] = bx3_0 * w0d + bx3_1 * w1d + bx3_2 * w2d; + // ToDo: implement template in srpic.hpp + const unsigned int O = 2u; + + // ToDo: change to 1u! + if constexpr (O == 0u) { + + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + + // first order + real_t c0, c1; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + + // Ex1 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::ex1); + c1 = EB(i + indx, em::ex1); + e0[0] = c0 * pondmx + c1 * pondpx; + // Ex2 + // Interpolate --- (primal) + c0 = EB(i, em::ex2); + c1 = EB(i + 1, em::ex2); + e0[1] = c0 * ponpmx + c1 * ponppx; + // Ex3 + // Interpolate --- (primal) + c0 = EB(i, em::ex3); + c1 = EB(i + 1, em::ex3); + e0[2] = c0 * ponpmx + c1 * ponppx; + // Bx1 + // Interpolate --- (primal) + c0 = EB(i, em::bx1); + c1 = EB(i + 1, em::bx1); + b0[0] = c0 * ponpmx + c1 * ponppx; + // Bx2 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx2); + c1 = EB(i + indx, em::bx2); + b0[1] = c0 * pondmx + c1 * pondpx; + // Bx3 + // Interpolate --- (dual) + c0 = EB(i - 1 + indx, em::bx3); + c1 = EB(i + indx, em::bx3); + b0[2] = c0 * pondmx + c1 * pondpx; + } else if constexpr (D == Dim::_2D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + + // first order + real_t c000, c100, c010, c110, c00, c10; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + + // Ex1 + // Interpolate --- (dual, primal) + c000 = EB(i - 1 + indx, j, em::ex1); + c100 = EB(i + indx, j, em::ex1); + c010 = EB(i - 1 + indx, j + 1, em::ex1); + c110 = EB(i + indx, j + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + e0[0] = c00 * ponpmy + c10 * ponppy; + // Ex2 + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::ex2); + c100 = EB(i + 1, j - 1 + indy, em::ex2); + c010 = EB(i, j + indy, em::ex2); + c110 = EB(i + 1, j + indy, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[1] = c00 * pondmy + c10 * pondpy; + // Ex3 + // Interpolate -- (primal, primal) + c000 = EB(i, j, em::ex3); + c100 = EB(i + 1, j, em::ex3); + c010 = EB(i, j + 1, em::ex3); + c110 = EB(i + 1, j + 1, em::ex3); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + e0[2] = c00 * ponpmy + c10 * ponppy; + + // Bx1 + // Interpolate -- (primal, dual) + c000 = EB(i, j - 1 + indy, em::bx1); + c100 = EB(i + 1, j - 1 + indy, em::bx1); + c010 = EB(i, j + indy, em::bx1); + c110 = EB(i + 1, j + indy, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + b0[0] = c00 * pondmy + c10 * pondpy; + // Bx2 + // Interpolate -- (dual, primal) + c000 = EB(i - 1 + indx, j, em::bx2); + c100 = EB(i + indx, j, em::bx2); + c010 = EB(i - 1 + indx, j + 1, em::bx2); + c110 = EB(i + indx, j + 1, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[1] = c00 * ponpmy + c10 * ponppy; + // Bx3 + // Interpolate -- (dual, dual) + c000 = EB(i - 1 + indx, j - 1 + indy, em::bx3); + c100 = EB(i + indx, j - 1 + indy, em::bx3); + c010 = EB(i - 1 + indx, j + indy, em::bx3); + c110 = EB(i + indx, j + indy, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + b0[2] = c00 * pondmy + c10 * pondpy; + } else if constexpr (D == Dim::_3D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // direct interpolation - Arno + int indx = static_cast(dx1_ + HALF); + int indy = static_cast(dx2_ + HALF); + int indz = static_cast(dx3_ + HALF); + + // first order + real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, + c11, c0, c1; + + real_t ponpmx = ONE - dx1_; + real_t ponppx = dx1_; + real_t ponpmy = ONE - dx2_; + real_t ponppy = dx2_; + real_t ponpmz = ONE - dx3_; + real_t ponppz = dx3_; + + real_t pondmx = static_cast(indx + ONE) - (dx1_ + HALF); + real_t pondpx = ONE - pondmx; + real_t pondmy = static_cast(indy + ONE) - (dx2_ + HALF); + real_t pondpy = ONE - pondmy; + real_t pondmz = static_cast(indz + ONE) - (dx3_ + HALF); + real_t pondpz = ONE - pondmz; + + // Ex1 + // Interpolate --- (dual, primal, primal) + c000 = EB(i - 1 + indx, j, k, em::ex1); + c100 = EB(i + indx, j, k, em::ex1); + c010 = EB(i - 1 + indx, j + 1, k, em::ex1); + c110 = EB(i + indx, j + 1, k, em::ex1); + c001 = EB(i - 1 + indx, j, k + 1, em::ex1); + c101 = EB(i + indx, j, k + 1, em::ex1); + c011 = EB(i - 1 + indx, j + 1, k + 1, em::ex1); + c111 = EB(i + indx, j + 1, k + 1, em::ex1); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[0] = c0 * ponpmz + c1 * ponppz; + // Ex2 + // Interpolate -- (primal, dual, primal) + c000 = EB(i, j - 1 + indy, k, em::ex2); + c100 = EB(i + 1, j - 1 + indy, k, em::ex2); + c010 = EB(i, j + indy, k, em::ex2); + c110 = EB(i + 1, j + indy, k, em::ex2); + c001 = EB(i, j - 1 + indy, k + 1, em::ex2); + c101 = EB(i + 1, j - 1 + indy, k + 1, em::ex2); + c011 = EB(i, j + indy, k + 1, em::ex2); + c111 = EB(i + 1, j + indy, k + 1, em::ex2); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + e0[1] = c0 * ponpmz + c1 * ponppz; + // Ex3 + // Interpolate -- (primal, primal, dual) + c000 = EB(i, j, k - 1 + indz, em::ex3); + c100 = EB(i + 1, j, k - 1 + indz, em::ex3); + c010 = EB(i, j + 1, k - 1 + indz, em::ex3); + c110 = EB(i + 1, j + 1, k - 1 + indz, em::ex3); + c001 = EB(i, j, k + indz, em::ex3); + c101 = EB(i + 1, j, k + indz, em::ex3); + c011 = EB(i, j + 1, k + indz, em::ex3); + c111 = EB(i + 1, j + 1, k + indz, em::ex3); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * ponpmy + c11 * ponppy; + e0[2] = c0 * pondmz + c1 * pondpz; + + // Bx1 + // Interpolate -- (primal, dual, dual) + c000 = EB(i, j - 1 + indy, k - 1 + indz, em::bx1); + c100 = EB(i + 1, j - 1 + indy, k - 1 + indz, em::bx1); + c010 = EB(i, j + indy, k - 1 + indz, em::bx1); + c110 = EB(i + 1, j + indy, k - 1 + indz, em::bx1); + c001 = EB(i, j - 1 + indy, k + indz, em::bx1); + c101 = EB(i + 1, j - 1 + indy, k + indz, em::bx1); + c011 = EB(i, j + indy, k + indz, em::bx1); + c111 = EB(i + 1, j + indy, k + indz, em::bx1); + c00 = c000 * ponpmx + c100 * ponppx; + c10 = c010 * ponpmx + c110 * ponppx; + c0 = c00 * pondmy + c10 * pondpy; + c01 = c001 * ponpmx + c101 * ponppx; + c11 = c011 * ponpmx + c111 * ponppx; + c1 = c01 * pondmy + c11 * pondpy; + b0[0] = c0 * pondmz + c1 * pondpz; + // Bx2 + // Interpolate -- (dual, primal, dual) + c000 = EB(i - 1 + indx, j, k - 1 + indz, em::bx2); + c100 = EB(i + indx, j, k - 1 + indz, em::bx2); + c010 = EB(i - 1 + indx, j + 1, k - 1 + indz, em::bx2); + c110 = EB(i + indx, j + 1, k - 1 + indz, em::bx2); + c001 = EB(i - 1 + indx, j, k + indz, em::bx2); + c101 = EB(i + indx, j, k + indz, em::bx2); + c011 = EB(i - 1 + indx, j + 1, k + indz, em::bx2); + c111 = EB(i + indx, j + 1, k + indz, em::bx2); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[1] = c0 * pondmz + c1 * pondpz; + // Bx3 + // Interpolate -- (dual, dual, primal) + c000 = EB(i - 1 + indx, j - 1 + indy, k, em::bx3); + c100 = EB(i + indx, j - 1 + indy, k, em::bx3); + c010 = EB(i - 1 + indx, j + indy, k, em::bx3); + c110 = EB(i + indx, j + indy, k, em::bx3); + c001 = EB(i - 1 + indx, j - 1 + indy, k + 1, em::bx3); + c101 = EB(i + indx, j - 1 + indy, k + 1, em::bx3); + c011 = EB(i - 1 + indx, j + indy, k + 1, em::bx3); + c111 = EB(i + indx, j + indy, k + 1, em::bx3); + c00 = c000 * pondmx + c100 * pondpx; + c10 = c010 * pondmx + c110 * pondpx; + c0 = c00 * ponpmy + c10 * ponppy; + c01 = c001 * pondmx + c101 * pondpx; + c11 = c011 * pondmx + c111 * pondpx; + c1 = c01 * ponpmy + c11 * ponppy; + b0[2] = c0 * ponpmz + c1 * ponppz; + } + } else if constexpr ((O >= 1u) and (O <= 5u)) { + + if constexpr (D == Dim::_1D) { + const int i { i1(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + // primal and dual shape function + real_t Sp[O + 1], Sd[O + 1]; + // minimum contributing cells + int ip_min, id_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, Sp); + + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, Sd); + + // Ex1 -- dual + e0[0] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[0] += Sd[idx1] * EB(id_min + idx1, em::ex1); + } - } else if constexpr (D == Dim::_2D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - const int indy = static_cast(static_cast(dx2_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2px = ONE - w0px - w1px; - const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); - const auto w2py = ONE - w0py - w1py; - - // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; - const auto w0dy = HALF * SQR(ONE - dx2_); - const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dy - w2dy; - - // Ex1 - // Interpolate --- (dual, primal) - // clang-format off - const auto ex1_000 = EB(i - 1, indy + j - 1, em::ex1); - const auto ex1_100 = EB(i, indy + j - 1, em::ex1); - const auto ex1_200 = EB(i + 1, indy + j - 1, em::ex1); - const auto ex1_010 = EB(i - 1, indy + j, em::ex1); - const auto ex1_110 = EB(i, indy + j, em::ex1); - const auto ex1_210 = EB(i + 1, indy + j, em::ex1); - const auto ex1_020 = EB(i - 1, indy + j + 1, em::ex1); - const auto ex1_120 = EB(i, indy + j + 1, em::ex1); - const auto ex1_220 = EB(i + 1, indy + j + 1, em::ex1); - // clang-format on - - const auto ex1_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; - const auto ex1_1 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; - const auto ex1_2 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; - e0[0] = ex1_0 * w0py + ex1_1 * w1py + ex1_2 * w2py; - - // Ex2 - // Interpolate --- (primal, dual) - // clang-format off - const auto ex2_000 = EB(indx + i - 1, j - 1, em::ex2); - const auto ex2_100 = EB(indx + i, j - 1, em::ex2); - const auto ex2_200 = EB(indx + i + 1, j - 1, em::ex2); - const auto ex2_010 = EB(indx + i - 1, j, em::ex2); - const auto ex2_110 = EB(indx + i, j, em::ex2); - const auto ex2_210 = EB(indx + i + 1, j, em::ex2); - const auto ex2_020 = EB(indx + i - 1, j + 1, em::ex2); - const auto ex2_120 = EB(indx + i, j + 1, em::ex2); - const auto ex2_220 = EB(indx + i + 1, j + 1, em::ex2); - // clang-format on - - const auto ex2_0 = ex2_000 * w0px + ex2_100 * w1px + ex2_200 * w2px; - const auto ex2_1 = ex2_010 * w0px + ex2_110 * w1px + ex2_210 * w2px; - const auto ex2_2 = ex2_020 * w0px + ex2_120 * w1px + ex2_220 * w2px; - e0[1] = ex2_0 * w0dy + ex2_1 * w1dy + ex2_2 * w2dy; - - // Ex3 - // Interpolate --- (primal, primal) - // clang-format off - const auto ex3_000 = EB(indx + i - 1, indy + j - 1, em::ex3); - const auto ex3_100 = EB(indx + i, indy + j - 1, em::ex3); - const auto ex3_200 = EB(indx + i + 1, indy + j - 1, em::ex3); - const auto ex3_010 = EB(indx + i - 1, indy + j, em::ex3); - const auto ex3_110 = EB(indx + i, indy + j, em::ex3); - const auto ex3_210 = EB(indx + i + 1, indy + j, em::ex3); - const auto ex3_020 = EB(indx + i - 1, indy + j + 1, em::ex3); - const auto ex3_120 = EB(indx + i, indy + j + 1, em::ex3); - const auto ex3_220 = EB(indx + i + 1, indy + j + 1, em::ex3); - // clang-format on - - const auto ex3_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; - const auto ex3_1 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; - const auto ex3_2 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; - e0[2] = ex3_0 * w0py + ex3_1 * w1py + ex3_2 * w2py; - - // Bx1 - // Interpolate --- (primal, dual) - // clang-format off - const auto bx1_000 = EB(indx + i - 1, j - 1, em::bx1); - const auto bx1_100 = EB(indx + i, j - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, j - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, j, em::bx1); - const auto bx1_110 = EB(indx + i, j, em::bx1); - const auto bx1_210 = EB(indx + i + 1, j, em::bx1); - const auto bx1_020 = EB(indx + i - 1, j + 1, em::bx1); - const auto bx1_120 = EB(indx + i, j + 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, j + 1, em::bx1); - // clang-format on - - const auto bx1_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; - const auto bx1_1 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; - const auto bx1_2 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; - b0[0] = bx1_0 * w0dy + bx1_1 * w1dy + bx1_2 * w2dy; - - // Bx2 - // Interpolate --- (dual, primal) - // clang-format off - const auto bx2_000 = EB(i - 1, indy + j - 1, em::bx2); - const auto bx2_100 = EB(i, indy + j - 1, em::bx2); - const auto bx2_200 = EB(i + 1, indy + j - 1, em::bx2); - const auto bx2_010 = EB(i - 1, indy + j, em::bx2); - const auto bx2_110 = EB(i, indy + j, em::bx2); - const auto bx2_210 = EB(i + 1, indy + j, em::bx2); - const auto bx2_020 = EB(i - 1, indy + j + 1, em::bx2); - const auto bx2_120 = EB(i, indy + j + 1, em::bx2); - const auto bx2_220 = EB(i + 1, indy + j + 1, em::bx2); - // clang-format on - - const auto bx2_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; - const auto bx2_1 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; - const auto bx2_2 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; - b0[1] = bx2_0 * w0py + bx2_1 * w1py + bx2_2 * w2py; - - // Bx3 - // Interpolate --- (dual, dual) - // clang-format off - const auto bx3_000 = EB(i - 1, j - 1, em::bx3); - const auto bx3_100 = EB(i, j - 1, em::bx3); - const auto bx3_200 = EB(i + 1, j - 1, em::bx3); - const auto bx3_010 = EB(i - 1, j, em::bx3); - const auto bx3_110 = EB(i, j, em::bx3); - const auto bx3_210 = EB(i + 1, j, em::bx3); - const auto bx3_020 = EB(i - 1, j + 1, em::bx3); - const auto bx3_120 = EB(i, j + 1, em::bx3); - const auto bx3_220 = EB(i + 1, j + 1, em::bx3); - // clang-format on - - const auto bx3_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; - const auto bx3_1 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; - const auto bx3_2 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; - b0[2] = bx3_0 * w0dy + bx3_1 * w1dy + bx3_2 * w2dy; + // Ex2 -- primal + e0[1] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[1] += Sp[idx1] * EB(ip_min + idx1, em::ex2); + } - } else if constexpr (D == Dim::_3D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // direct interpolation of staggered grid - // primal = i+ind, dual = i - const int indx = static_cast(static_cast(dx1_ + HALF)); - const int indy = static_cast(static_cast(dx2_ + HALF)); - const int indz = static_cast(static_cast(dx3_ + HALF)); - - // Compute weights for second-order interpolation - // primal - const auto w0px = HALF * SQR(HALF - dx1_ + static_cast(indx)); - const auto w1px = THREE_FOURTHS - SQR(dx1_ - static_cast(indx)); - const auto w2px = ONE - w0px - w1px; - const auto w0py = HALF * SQR(HALF - dx2_ + static_cast(indy)); - const auto w1py = THREE_FOURTHS - SQR(dx2_ - static_cast(indy)); - const auto w2py = ONE - w0py - w1py; - const auto w0pz = HALF * SQR(HALF - dx3_ + static_cast(indz)); - const auto w1pz = THREE_FOURTHS - SQR(dx3_ - static_cast(indz)); - const auto w2pz = ONE - w0pz - w1pz; - - // dual - const auto w0dx = HALF * SQR(ONE - dx1_); - const auto w2dx = HALF * SQR(dx1_); - const auto w1dx = ONE - w0dx - w2dx; - const auto w0dy = HALF * SQR(ONE - dx2_); - const auto w2dy = HALF * SQR(dx2_); - const auto w1dy = ONE - w0dy - w2dy; - const auto w0dz = HALF * SQR(ONE - dx3_); - const auto w2dz = HALF * SQR(dx3_); - const auto w1dz = ONE - w0dz - w2dz; - - // Ex1 - // Interpolate --- (dual, primal, primal) - // clang-format off - const auto ex1_000 = EB(i - 1, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_100 = EB(i, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_200 = EB(i + 1, indy + j - 1, indz + k - 1, em::ex1); - const auto ex1_010 = EB(i - 1, indy + j, indz + k - 1, em::ex1); - const auto ex1_110 = EB(i, indy + j, indz + k - 1, em::ex1); - const auto ex1_210 = EB(i + 1, indy + j, indz + k - 1, em::ex1); - const auto ex1_020 = EB(i - 1, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_120 = EB(i, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_220 = EB(i + 1, indy + j + 1, indz + k - 1, em::ex1); - const auto ex1_001 = EB(i - 1, indy + j - 1, indz + k, em::ex1); - const auto ex1_101 = EB(i, indy + j - 1, indz + k, em::ex1); - const auto ex1_201 = EB(i + 1, indy + j - 1, indz + k, em::ex1); - const auto ex1_011 = EB(i - 1, indy + j, indz + k, em::ex1); - const auto ex1_111 = EB(i, indy + j, indz + k, em::ex1); - const auto ex1_211 = EB(i + 1, indy + j, indz + k, em::ex1); - const auto ex1_021 = EB(i - 1, indy + j + 1, indz + k, em::ex1); - const auto ex1_121 = EB(i, indy + j + 1, indz + k, em::ex1); - const auto ex1_221 = EB(i + 1, indy + j + 1, indz + k, em::ex1); - const auto ex1_002 = EB(i - 1, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_102 = EB(i, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_202 = EB(i + 1, indy + j - 1, indz + k + 1, em::ex1); - const auto ex1_012 = EB(i - 1, indy + j, indz + k + 1, em::ex1); - const auto ex1_112 = EB(i, indy + j, indz + k + 1, em::ex1); - const auto ex1_212 = EB(i + 1, indy + j, indz + k + 1, em::ex1); - const auto ex1_022 = EB(i - 1, indy + j + 1, indz + k + 1, em::ex1); - const auto ex1_122 = EB(i, indy + j + 1, indz + k + 1, em::ex1); - const auto ex1_222 = EB(i + 1, indy + j + 1, indz + k + 1, em::ex1); - // clang-format on - - const auto ex1_0_0 = ex1_000 * w0dx + ex1_100 * w1dx + ex1_200 * w2dx; - const auto ex1_1_0 = ex1_010 * w0dx + ex1_110 * w1dx + ex1_210 * w2dx; - const auto ex1_2_0 = ex1_020 * w0dx + ex1_120 * w1dx + ex1_220 * w2dx; - const auto ex1_0_1 = ex1_001 * w0dx + ex1_101 * w1dx + ex1_201 * w2dx; - const auto ex1_1_1 = ex1_011 * w0dx + ex1_111 * w1dx + ex1_211 * w2dx; - const auto ex1_2_1 = ex1_021 * w0dx + ex1_121 * w1dx + ex1_221 * w2dx; - const auto ex1_0_2 = ex1_002 * w0dx + ex1_102 * w1dx + ex1_202 * w2dx; - const auto ex1_1_2 = ex1_012 * w0dx + ex1_112 * w1dx + ex1_212 * w2dx; - const auto ex1_2_2 = ex1_022 * w0dx + ex1_122 * w1dx + ex1_222 * w2dx; - - const auto ex1_00 = ex1_0_0 * w0py + ex1_1_0 * w1py + ex1_2_0 * w2py; - const auto ex1_01 = ex1_0_1 * w0py + ex1_1_1 * w1py + ex1_2_1 * w2py; - const auto ex1_02 = ex1_0_2 * w0py + ex1_1_2 * w1py + ex1_2_2 * w2py; - - e0[0] = ex1_00 * w0pz + ex1_01 * w1pz + ex1_02 * w2pz; - - // Ex2 - // Interpolate -- (primal, dual, primal) - // clang-format off - const auto ex2_000 = EB(indx + i - 1, j - 1, indz + k - 1, em::ex2); - const auto ex2_100 = EB(indx + i, j - 1, indz + k - 1, em::ex2); - const auto ex2_200 = EB(indx + i + 1, j - 1, indz + k - 1, em::ex2); - const auto ex2_010 = EB(indx + i - 1, j, indz + k - 1, em::ex2); - const auto ex2_110 = EB(indx + i, j, indz + k - 1, em::ex2); - const auto ex2_210 = EB(indx + i + 1, j, indz + k - 1, em::ex2); - const auto ex2_020 = EB(indx + i - 1, j + 1, indz + k - 1, em::ex2); - const auto ex2_120 = EB(indx + i, j + 1, indz + k - 1, em::ex2); - const auto ex2_220 = EB(indx + i + 1, j + 1, indz + k - 1, em::ex2); - const auto ex2_001 = EB(indx + i - 1, j - 1, indz + k, em::ex2); - const auto ex2_101 = EB(indx + i, j - 1, indz + k, em::ex2); - const auto ex2_201 = EB(indx + i + 1, j - 1, indz + k, em::ex2); - const auto ex2_011 = EB(indx + i - 1, j, indz + k, em::ex2); - const auto ex2_111 = EB(indx + i, j, indz + k, em::ex2); - const auto ex2_211 = EB(indx + i + 1, j, indz + k, em::ex2); - const auto ex2_021 = EB(indx + i - 1, j + 1, indz + k, em::ex2); - const auto ex2_121 = EB(indx + i, j + 1, indz + k, em::ex2); - const auto ex2_221 = EB(indx + i + 1, j + 1, indz + k, em::ex2); - const auto ex2_002 = EB(indx + i - 1, j - 1, indz + k + 1, em::ex2); - const auto ex2_102 = EB(indx + i, j - 1, indz + k + 1, em::ex2); - const auto ex2_202 = EB(indx + i + 1, j - 1, indz + k + 1, em::ex2); - const auto ex2_012 = EB(indx + i - 1, j, indz + k + 1, em::ex2); - const auto ex2_112 = EB(indx + i, j, indz + k + 1, em::ex2); - const auto ex2_212 = EB(indx + i + 1, j, indz + k + 1, em::ex2); - const auto ex2_022 = EB(indx + i - 1, j + 1, indz + k + 1, em::ex2); - const auto ex2_122 = EB(indx + i, j + 1, indz + k + 1, em::ex2); - const auto ex2_222 = EB(indx + i + 1, j + 1, indz + k + 1, em::ex2); - // clang-format on - - const auto ex2_0_0 = ex2_000 * w0px + ex2_100 * w1px + ex1_200 * w2px; - const auto ex2_1_0 = ex2_010 * w0px + ex2_110 * w1px + ex1_210 * w2px; - const auto ex2_2_0 = ex2_020 * w0px + ex2_120 * w1px + ex1_220 * w2px; - const auto ex2_0_1 = ex2_001 * w0px + ex2_101 * w1px + ex2_201 * w2px; - const auto ex2_1_1 = ex2_011 * w0px + ex2_111 * w1px + ex2_211 * w2px; - const auto ex2_2_1 = ex2_021 * w0px + ex2_121 * w1px + ex2_221 * w2px; - const auto ex2_0_2 = ex2_002 * w0px + ex2_102 * w1px + ex2_202 * w2px; - const auto ex2_1_2 = ex2_012 * w0px + ex2_112 * w1px + ex2_212 * w2px; - const auto ex2_2_2 = ex2_022 * w0px + ex2_122 * w1px + ex2_222 * w2px; - - const auto ex2_00 = ex2_0_0 * w0dy + ex2_1_0 * w1dy + ex2_2_0 * w2dy; - const auto ex2_01 = ex2_0_1 * w0dy + ex2_1_1 * w1dy + ex2_2_1 * w2dy; - const auto ex2_02 = ex2_0_2 * w0dy + ex2_1_2 * w1dy + ex2_2_2 * w2dy; - - e0[1] = ex2_00 * w0pz + ex2_01 * w1pz + ex2_02 * w2pz; - - // Ex3 - // Interpolate -- (primal, primal, dual) - // clang-format off - const auto ex3_000 = EB(indx + i - 1, indy + j - 1, k - 1, em::ex3); - const auto ex3_100 = EB(indx + i, indy + j - 1, k - 1, em::ex3); - const auto ex3_200 = EB(indx + i + 1, indy + j - 1, k - 1, em::ex3); - const auto ex3_010 = EB(indx + i - 1, indy + j, k - 1, em::ex3); - const auto ex3_110 = EB(indx + i, indy + j, k - 1, em::ex3); - const auto ex3_210 = EB(indx + i + 1, indy + j, k - 1, em::ex3); - const auto ex3_020 = EB(indx + i - 1, indy + j + 1, k - 1, em::ex3); - const auto ex3_120 = EB(indx + i, indy + j + 1, k - 1, em::ex3); - const auto ex3_220 = EB(indx + i + 1, indy + j + 1, k - 1, em::ex3); - const auto ex3_001 = EB(indx + i - 1, indy + j - 1, k, em::ex3); - const auto ex3_101 = EB(indx + i, indy + j - 1, k, em::ex3); - const auto ex3_201 = EB(indx + i + 1, indy + j - 1, k, em::ex3); - const auto ex3_011 = EB(indx + i - 1, indy + j, k, em::ex3); - const auto ex3_111 = EB(indx + i, indy + j, k, em::ex3); - const auto ex3_211 = EB(indx + i + 1, indy + j, k, em::ex3); - const auto ex3_021 = EB(indx + i - 1, indy + j + 1, k, em::ex3); - const auto ex3_121 = EB(indx + i, indy + j + 1, k, em::ex3); - const auto ex3_221 = EB(indx + i + 1, indy + j + 1, k, em::ex3); - const auto ex3_002 = EB(indx + i - 1, indy + j - 1, k + 1, em::ex3); - const auto ex3_102 = EB(indx + i, indy + j - 1, k + 1, em::ex3); - const auto ex3_202 = EB(indx + i + 1, indy + j - 1, k + 1, em::ex3); - const auto ex3_012 = EB(indx + i - 1, indy + j, k + 1, em::ex3); - const auto ex3_112 = EB(indx + i, indy + j, k + 1, em::ex3); - const auto ex3_212 = EB(indx + i + 1, indy + j, k + 1, em::ex3); - const auto ex3_022 = EB(indx + i - 1, indy + j + 1, k + 1, em::ex3); - const auto ex3_122 = EB(indx + i, indy + j + 1, k + 1, em::ex3); - const auto ex3_222 = EB(indx + i + 1, indy + j + 1, k + 1, em::ex3); - // clang-format on - - const auto ex3_0_0 = ex3_000 * w0px + ex3_100 * w1px + ex3_200 * w2px; - const auto ex3_1_0 = ex3_010 * w0px + ex3_110 * w1px + ex3_210 * w2px; - const auto ex3_2_0 = ex3_020 * w0px + ex3_120 * w1px + ex3_220 * w2px; - const auto ex3_0_1 = ex3_001 * w0px + ex3_101 * w1px + ex3_201 * w2px; - const auto ex3_1_1 = ex3_011 * w0px + ex3_111 * w1px + ex3_211 * w2px; - const auto ex3_2_1 = ex3_021 * w0px + ex3_121 * w1px + ex3_221 * w2px; - const auto ex3_0_2 = ex3_002 * w0px + ex3_102 * w1px + ex3_202 * w2px; - const auto ex3_1_2 = ex3_012 * w0px + ex3_112 * w1px + ex3_212 * w2px; - const auto ex3_2_2 = ex3_022 * w0px + ex3_122 * w1px + ex3_222 * w2px; - - const auto ex3_00 = ex3_0_0 * w0py + ex3_1_0 * w1py + ex3_2_0 * w2py; - const auto ex3_01 = ex3_0_1 * w0py + ex3_1_1 * w1py + ex3_2_1 * w2py; - const auto ex3_02 = ex3_0_2 * w0py + ex3_1_2 * w1py + ex3_2_2 * w2py; - - e0[2] = ex3_00 * w0dz + ex3_01 * w1dz + ex3_02 * w2dz; - - // Bx1 - // Interpolate -- (primal, dual, dual) - // clang-format off - const auto bx1_000 = EB(indx + i - 1, j - 1, k - 1, em::bx1); - const auto bx1_100 = EB(indx + i, j - 1, k - 1, em::bx1); - const auto bx1_200 = EB(indx + i + 1, j - 1, k - 1, em::bx1); - const auto bx1_010 = EB(indx + i - 1, j, k - 1, em::bx1); - const auto bx1_110 = EB(indx + i, j, k - 1, em::bx1); - const auto bx1_210 = EB(indx + i + 1, j, k - 1, em::bx1); - const auto bx1_020 = EB(indx + i - 1, j + 1, k - 1, em::bx1); - const auto bx1_120 = EB(indx + i, j + 1, k - 1, em::bx1); - const auto bx1_220 = EB(indx + i + 1, j + 1, k - 1, em::bx1); - const auto bx1_001 = EB(indx + i - 1, j - 1, k, em::bx1); - const auto bx1_101 = EB(indx + i, j - 1, k, em::bx1); - const auto bx1_201 = EB(indx + i + 1, j - 1, k, em::bx1); - const auto bx1_011 = EB(indx + i - 1, j, k, em::bx1); - const auto bx1_111 = EB(indx + i, j, k, em::bx1); - const auto bx1_211 = EB(indx + i + 1, j, k, em::bx1); - const auto bx1_021 = EB(indx + i - 1, j + 1, k, em::bx1); - const auto bx1_121 = EB(indx + i, j + 1, k, em::bx1); - const auto bx1_221 = EB(indx + i + 1, j + 1, k, em::bx1); - const auto bx1_002 = EB(indx + i - 1, j - 1, k + 1, em::bx1); - const auto bx1_102 = EB(indx + i, j - 1, k + 1, em::bx1); - const auto bx1_202 = EB(indx + i + 1, j - 1, k + 1, em::bx1); - const auto bx1_012 = EB(indx + i - 1, j, k + 1, em::bx1); - const auto bx1_112 = EB(indx + i, j, k + 1, em::bx1); - const auto bx1_212 = EB(indx + i + 1, j, k + 1, em::bx1); - const auto bx1_022 = EB(indx + i - 1, j + 1, k + 1, em::bx1); - const auto bx1_122 = EB(indx + i, j + 1, k + 1, em::bx1); - const auto bx1_222 = EB(indx + i + 1, j + 1, k + 1, em::bx1); - // clang-format on - - const auto bx1_0_0 = bx1_000 * w0px + bx1_100 * w1px + bx1_200 * w2px; - const auto bx1_1_0 = bx1_010 * w0px + bx1_110 * w1px + bx1_210 * w2px; - const auto bx1_2_0 = bx1_020 * w0px + bx1_120 * w1px + bx1_220 * w2px; - const auto bx1_0_1 = bx1_001 * w0px + bx1_101 * w1px + bx1_201 * w2px; - const auto bx1_1_1 = bx1_011 * w0px + bx1_111 * w1px + bx1_211 * w2px; - const auto bx1_2_1 = bx1_021 * w0px + bx1_121 * w1px + bx1_221 * w2px; - const auto bx1_0_2 = bx1_002 * w0px + bx1_102 * w1px + bx1_202 * w2px; - const auto bx1_1_2 = bx1_012 * w0px + bx1_112 * w1px + bx1_212 * w2px; - const auto bx1_2_2 = bx1_022 * w0px + bx1_122 * w1px + bx1_222 * w2px; - - const auto bx1_00 = bx1_0_0 * w0dy + bx1_1_0 * w1dy + bx1_2_0 * w2dy; - const auto bx1_01 = bx1_0_1 * w0dy + bx1_1_1 * w1dy + bx1_2_1 * w2dy; - const auto bx1_02 = bx1_0_2 * w0dy + bx1_1_2 * w1dy + bx1_2_2 * w2dy; - - b0[0] = bx1_00 * w0dz + bx1_01 * w1dz + bx1_02 * w2dz; - - // Bx2 - // Interpolate -- (dual, primal, dual) - // clang-format off - const auto bx2_000 = EB(i - 1, indy + j - 1, k - 1, em::bx2); - const auto bx2_100 = EB(i, indy + j - 1, k - 1, em::bx2); - const auto bx2_200 = EB(i + 1, indy + j - 1, k - 1, em::bx2); - const auto bx2_010 = EB(i - 1, indy + j, k - 1, em::bx2); - const auto bx2_110 = EB(i, indy + j, k - 1, em::bx2); - const auto bx2_210 = EB(i + 1, indy + j, k - 1, em::bx2); - const auto bx2_020 = EB(i - 1, indy + j + 1, k - 1, em::bx2); - const auto bx2_120 = EB(i, indy + j + 1, k - 1, em::bx2); - const auto bx2_220 = EB(i + 1, indy + j + 1, k - 1, em::bx2); - const auto bx2_001 = EB(i - 1, indy + j - 1, k, em::bx2); - const auto bx2_101 = EB(i, indy + j - 1, k, em::bx2); - const auto bx2_201 = EB(i + 1, indy + j - 1, k, em::bx2); - const auto bx2_011 = EB(i - 1, indy + j, k, em::bx2); - const auto bx2_111 = EB(i, indy + j, k, em::bx2); - const auto bx2_211 = EB(i + 1, indy + j, k, em::bx2); - const auto bx2_021 = EB(i - 1, indy + j + 1, k, em::bx2); - const auto bx2_121 = EB(i, indy + j + 1, k, em::bx2); - const auto bx2_221 = EB(i + 1, indy + j + 1, k, em::bx2); - const auto bx2_002 = EB(i - 1, indy + j - 1, k + 1, em::bx2); - const auto bx2_102 = EB(i, indy + j - 1, k + 1, em::bx2); - const auto bx2_202 = EB(i + 1, indy + j - 1, k + 1, em::bx2); - const auto bx2_012 = EB(i - 1, indy + j, k + 1, em::bx2); - const auto bx2_112 = EB(i, indy + j, k + 1, em::bx2); - const auto bx2_212 = EB(i + 1, indy + j, k + 1, em::bx2); - const auto bx2_022 = EB(i - 1, indy + j + 1, k + 1, em::bx2); - const auto bx2_122 = EB(i, indy + j + 1, k + 1, em::bx2); - const auto bx2_222 = EB(i + 1, indy + j + 1, k + 1, em::bx2); - // clang-format on - - const auto bx2_0_0 = bx2_000 * w0dx + bx2_100 * w1dx + bx2_200 * w2dx; - const auto bx2_1_0 = bx2_010 * w0dx + bx2_110 * w1dx + bx2_210 * w2dx; - const auto bx2_2_0 = bx2_020 * w0dx + bx2_120 * w1dx + bx2_220 * w2dx; - const auto bx2_0_1 = bx2_001 * w0dx + bx2_101 * w1dx + bx2_201 * w2dx; - const auto bx2_1_1 = bx2_011 * w0dx + bx2_111 * w1dx + bx2_211 * w2dx; - const auto bx2_2_1 = bx2_021 * w0dx + bx2_121 * w1dx + bx2_221 * w2dx; - const auto bx2_0_2 = bx2_002 * w0dx + bx2_102 * w1dx + bx2_202 * w2dx; - const auto bx2_1_2 = bx2_012 * w0dx + bx2_112 * w1dx + bx2_212 * w2dx; - const auto bx2_2_2 = bx2_022 * w0dx + bx2_122 * w1dx + bx2_222 * w2dx; - - const auto bx2_00 = bx2_0_0 * w0py + bx2_1_0 * w1py + bx2_2_0 * w2py; - const auto bx2_01 = bx2_0_1 * w0py + bx2_1_1 * w1py + bx2_2_1 * w2py; - const auto bx2_02 = bx2_0_2 * w0py + bx2_1_2 * w1py + bx2_2_2 * w2py; - - b0[1] = bx2_00 * w0dz + bx2_01 * w1dz + bx2_02 * w2dz; - - // Bx3 - // Interpolate -- (dual, dual, primal) - // clang-format off - const auto bx3_000 = EB(i - 1, j - 1, indz + k - 1, em::bx3); - const auto bx3_100 = EB(i, j - 1, indz + k - 1, em::bx3); - const auto bx3_200 = EB(i + 1, j - 1, indz + k - 1, em::bx3); - const auto bx3_010 = EB(i - 1, j, indz + k - 1, em::bx3); - const auto bx3_110 = EB(i, j, indz + k - 1, em::bx3); - const auto bx3_210 = EB(i + 1, j, indz + k - 1, em::bx3); - const auto bx3_020 = EB(i - 1, j + 1, indz + k - 1, em::bx3); - const auto bx3_120 = EB(i, j + 1, indz + k - 1, em::bx3); - const auto bx3_220 = EB(i + 1, j + 1, indz + k - 1, em::bx3); - const auto bx3_001 = EB(i - 1, j - 1, indz + k, em::bx3); - const auto bx3_101 = EB(i, j - 1, indz + k, em::bx3); - const auto bx3_201 = EB(i + 1, j - 1, indz + k, em::bx3); - const auto bx3_011 = EB(i - 1, j, indz + k, em::bx3); - const auto bx3_111 = EB(i, j, indz + k, em::bx3); - const auto bx3_211 = EB(i + 1, j, indz + k, em::bx3); - const auto bx3_021 = EB(i - 1, j + 1, indz + k, em::bx3); - const auto bx3_121 = EB(i, j + 1, indz + k, em::bx3); - const auto bx3_221 = EB(i + 1, j + 1, indz + k, em::bx3); - const auto bx3_002 = EB(i - 1, j - 1, indz + k + 1, em::bx3); - const auto bx3_102 = EB(i, j - 1, indz + k + 1, em::bx3); - const auto bx3_202 = EB(i + 1, j - 1, indz + k + 1, em::bx3); - const auto bx3_012 = EB(i - 1, j, indz + k + 1, em::bx3); - const auto bx3_112 = EB(i, j, indz + k + 1, em::bx3); - const auto bx3_212 = EB(i + 1, j, indz + k + 1, em::bx3); - const auto bx3_022 = EB(i - 1, j + 1, indz + k + 1, em::bx3); - const auto bx3_122 = EB(i, j + 1, indz + k + 1, em::bx3); - const auto bx3_222 = EB(i + 1, j + 1, indz + k + 1, em::bx3); - // clang-format on - - const auto bx3_0_0 = bx3_000 * w0dx + bx3_100 * w1dx + bx3_200 * w2dx; - const auto bx3_1_0 = bx3_010 * w0dx + bx3_110 * w1dx + bx3_210 * w2dx; - const auto bx3_2_0 = bx3_020 * w0dx + bx3_120 * w1dx + bx3_220 * w2dx; - const auto bx3_0_1 = bx3_001 * w0dx + bx3_101 * w1dx + bx3_201 * w2dx; - const auto bx3_1_1 = bx3_011 * w0dx + bx3_111 * w1dx + bx3_211 * w2dx; - const auto bx3_2_1 = bx3_021 * w0dx + bx3_121 * w1dx + bx3_221 * w2dx; - const auto bx3_0_2 = bx3_002 * w0dx + bx3_102 * w1dx + bx3_202 * w2dx; - const auto bx3_1_2 = bx3_012 * w0dx + bx3_112 * w1dx + bx3_212 * w2dx; - const auto bx3_2_2 = bx3_022 * w0dx + bx3_122 * w1dx + bx3_222 * w2dx; - - const auto bx3_00 = bx3_0_0 * w0dy + bx3_1_0 * w1dy + bx3_2_0 * w2dy; - const auto bx3_01 = bx3_0_1 * w0dy + bx3_1_1 * w1dy + bx3_2_1 * w2dy; - const auto bx3_02 = bx3_0_2 * w0dy + bx3_1_2 * w1dy + bx3_2_2 * w2dy; - - b0[2] = bx3_00 * w0pz + bx3_01 * w1pz + bx3_02 * w2pz; + // Ex3 -- primal + e0[2] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + e0[2] += Sp[idx1] * EB(ip_min + idx1, em::ex3); + } + + // Bx1 -- primal + b0[0] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[0] += Sp[idx1] * EB(ip_min + idx1, em::bx1); + } + + // Bx2 -- dual + b0[1] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[1] += Sd[idx1] * EB(id_min + idx1, em::bx2); + } + + // Bx3 -- dual + b0[2] = ZERO; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + b0[2] += Sd[idx1] * EB(id_min + idx1, em::bx3); + } + + } else if constexpr (D == Dim::_2D) { + + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + + // primal and dual shape function + real_t S1p[O + 1], S1d[O + 1]; + real_t S2p[O + 1], S2d[O + 1]; + // minimum contributing cells + int ip_min, id_min; + int jp_min, jd_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, S1p); + prtl_shape::order(j, dx2_, jp_min, S2p); + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, S1d); + prtl_shape::order(j, dx2_, jd_min, S2d); + + // Ex1 -- dual, primal + e0[0] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::ex1); + } + e0[0] += c0 * S2p[idx2]; + } + + // Ex2 -- primal, dual + e0[1] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::ex2); + } + e0[1] += c0 * S2d[idx2]; + } + + // Ex3 -- primal, primal + e0[2] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, em::ex3); + } + e0[2] += c0 * S2p[idx2]; + } + + // Bx1 -- primal, dual + b0[0] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::bx1); + } + b0[0] += c0 * S2d[idx2]; + } + + // Bx2 -- dual, primal + b0[1] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::bx2); + } + b0[1] += c0 * S2p[idx2]; + } + + // Bx3 -- dual, dual + b0[2] = ZERO; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c0 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c0 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, em::bx3); + } + b0[2] += c0 * S2d[idx2]; + } + + } else if constexpr (D == Dim::_3D) { + + const int i { i1(p) + static_cast(N_GHOSTS) }; + const int j { i2(p) + static_cast(N_GHOSTS) }; + const int k { i3(p) + static_cast(N_GHOSTS) }; + const auto dx1_ { static_cast(dx1(p)) }; + const auto dx2_ { static_cast(dx2(p)) }; + const auto dx3_ { static_cast(dx3(p)) }; + + // primal and dual shape function + real_t S1p[O + 1], S1d[O + 1]; + real_t S2p[O + 1], S2d[O + 1]; + real_t S3p[O + 1], S3d[O + 1]; + + // minimum contributing cells + int ip_min, id_min; + int jp_min, jd_min; + int kp_min, kd_min; + + // primal shape function - not staggered + prtl_shape::order(i, dx1_, ip_min, S1p); + prtl_shape::order(j, dx2_, jp_min, S2p); + prtl_shape::order(k, dx3_, kp_min, S3p); + // dual shape function - staggered + prtl_shape::order(i, dx1_, id_min, S1d); + prtl_shape::order(j, dx2_, jd_min, S2d); + prtl_shape::order(k, dx3_, kd_min, S3d); + + // Ex1 -- dual, primal, primal + e0[0] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jp_min + idx2, kp_min + idx3, em::ex1); + } + c0 += c00 * S2p[idx2]; + } + e0[0] += c0 * S3p[idx3]; + } + + // Ex2 -- primal, dual, primal + e0[1] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jd_min + idx2, kp_min + idx3, em::ex2); + } + c0 += c00 * S2d[idx2]; + } + e0[1] += c0 * S3p[idx3]; + } + + // Ex3 -- primal, primal, dual + e0[2] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jp_min + idx2, kd_min + idx3, em::ex3); + } + c0 += c00 * S2p[idx2]; + } + e0[2] += c0 * S3d[idx3]; + } + + // Bx1 -- primal, dual, dual + b0[0] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1p[idx1] * + EB(ip_min + idx1, jd_min + idx2, kd_min + idx3, em::bx1); + } + c0 += c00 * S2d[idx2]; + } + b0[0] += c0 * S3d[idx3]; + } + + // Bx2 -- dual, primal, dual + b0[1] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jp_min + idx2, kd_min + idx3, em::bx2); + } + c0 += c00 * S2p[idx2]; + } + b0[1] += c0 * S3d[idx3]; + } + + // Bx3 -- dual, dual, primal + b0[2] = ZERO; + for (int idx3 = 0; idx3 < O + 1; idx3++) { + real_t c0 = 0.0; + for (int idx2 = 0; idx2 < O + 1; idx2++) { + real_t c00 = 0.0; + for (int idx1 = 0; idx1 < O + 1; idx1++) { + c00 += S1d[idx1] * + EB(id_min + idx1, jd_min + idx2, kp_min + idx3, em::bx3); + } + c0 += c00 * S2d[idx2]; + } + b0[2] += c0 * S3p[idx3]; + } + } } } From e1274d50248723e9f92b568cbebdeaf77a282ab7 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 13 Aug 2025 18:49:14 -0500 Subject: [PATCH 058/154] bugfixes for indexing --- src/kernels/currents_deposit.hpp | 54 ++++++++++---------------------- src/kernels/particle_shapes.hpp | 4 +-- 2 files changed, 19 insertions(+), 39 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 516108004..cb8862f3b 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -448,33 +448,22 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - // Esirkepov 2001, Eq. 38 - Wx1[i][j] = (fS_x1[i] - iS_x1[i]) * - (iS_x2[j] + HALF * (fS_x2[j] - iS_x2[j])); + // Esirkepov 2001, Eq. 38 (simplified) + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = (fS_x2[j] - iS_x2[j]) * - (iS_x2[j] + HALF * (fS_x1[i] - iS_x1[i])); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = iS_x1[i] * iS_x2[j] + - HALF * (fS_x1[i] - fS_x1[i]) * iS_x2[j] + - HALF * iS_x1[i] * (fS_x2[j] - iS_x2[j]) + - THIRD * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - - // Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - - // Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - - // Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x2[j]) + - // iS_x2[j] * (HALF * fS_x2[j] + iS_x2[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } // contribution within the shape function stencil - real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2], jx3[O + 2][O + 2]; + real_t jx1[O + 2][O + 2], jx2[O + 2][O + 2]; // prefactors for j update - const real_t Qdx1dt = -coeff * inv_dt; - const real_t Qdx2dt = -coeff * inv_dt; + const real_t Qdx1dt = coeff * inv_dt; + const real_t Qdx2dt = coeff * inv_dt; const real_t QVx3 = coeff * vp[2]; // Calculate current contribution @@ -482,37 +471,28 @@ namespace kernel { // jx1 #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx1[0][j] = Wx1[0][j]; + jx1[0][j] = -Qdx1dt * Wx1[0][j]; } #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx1[i][j] = jx1[i - 1][j] + Wx1[i][j]; + jx1[i][j] = jx1[i - 1][j] - Qdx1dt * Wx1[i][j]; } } // jx2 #pragma unroll for (int i = 0; i < O + 2; ++i) { - jx2[i][0] = Wx2[i][0]; + jx2[i][0] = -Qdx2dt * Wx2[i][0]; } #pragma unroll for (int j = 1; j < O + 2; ++j) { #pragma unroll for (int i = 0; i < O + 2; ++i) { - jx2[i][j] = jx2[i][j - 1] + Wx2[i][j]; - } - } - - // jx3 -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { - jx3[i][j] = Wx3[i][j]; + jx2[i][j] = jx2[i][j - 1] - Qdx2dt * Wx2[i][j]; } } @@ -531,21 +511,21 @@ namespace kernel { */ auto J_acc = J.access(); - for (int i = 0; i <= di_x1; ++i) { + for (int i = 0; i < di_x1; ++i) { for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx1) += Qdx1dt * jx1[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx1) += jx1[i][j]; } } for (int i = 0; i <= di_x1; ++i) { - for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx2) += Qdx2dt * jx2[i][j]; + for (int j = 0; j < di_x2; ++j) { + J_acc(i1_min + i, i2_min + j, cur::jx2) += jx2[i][j]; } } for (int i = 0; i <= di_x1; ++i) { for (int j = 0; j <= di_x2; ++j) { - J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * jx3[i][j]; + J_acc(i1_min + i, i2_min + j, cur::jx3) += QVx3 * Wx3[i][j]; } } diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 7d626c9d6..c35642d5f 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -313,7 +313,7 @@ namespace prtl_shape { if (i_init_min < i_fin_min) { i_min = i_init_min; - i_max = i_fin_min + O; + i_max = i_min + O + 1; #pragma unroll for (int j = 0; j < O + 1; j++) { @@ -329,7 +329,7 @@ namespace prtl_shape { } else if (i_init_min > i_fin_min) { i_min = i_fin_min; - i_max = i_init_min + O; + i_max = i_min + O + 1; iS[0] = ZERO; #pragma unroll From eae6a13513e64d0ad6fed9b23cc86188b38435f0 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 14 Aug 2025 17:57:28 -0500 Subject: [PATCH 059/154] add remaining shape_order cases --- src/engines/srpic.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index b63415a02..a99d33d67 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -549,6 +549,12 @@ namespace ntt { deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 2) { deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 3) { + deposit_with<3u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 4) { + deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 5) { + deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } From acfd1361d35b8e707d1619b7e50094521923365b Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 14 Aug 2025 17:57:43 -0500 Subject: [PATCH 060/154] bugfix for 3D deposit --- src/kernels/currents_deposit.hpp | 298 +++++-------------------------- 1 file changed, 43 insertions(+), 255 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index cb8862f3b..040503bcd 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -449,12 +449,12 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 (simplified) - Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + - iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } @@ -643,7 +643,7 @@ namespace kernel { for (int i = 0; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { - jx2[i][j][0] = -Qdydt * Wx2[i][j][0]; + jx3[i][j][0] = -Qdydt * Wx3[i][j][0]; } } @@ -658,268 +658,56 @@ namespace kernel { } } + // account for ghost cells + i1_min += N_GHOSTS; + i2_min += N_GHOSTS; + i3_min += N_GHOSTS; + i1_max += N_GHOSTS; + i2_max += N_GHOSTS; + i3_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + const int di_x2 = i2_max - i2_min; + const int di_x3 = i3_max - i3_min; + /* Current update */ auto J_acc = J.access(); -#pragma unroll - for (int i = 0; i < O + 2; ++i) { -#pragma unroll - for (int j = 0; j < O + 2; ++j) { -#pragma unroll - for (int k = 1; k < O + 2; ++k) { - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx1) += jx1[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx2) += jx2[i][j][k]; - J_acc(i1_min + i, i2_min + j, i3_min, cur::jx3) += jx3[i][j][k]; + for (int i = 0; i < di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { + for (int k = 0; k <= di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx1) += jx1[i][j][k]; + } + } + } + + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j < di_x2; ++j) { + for (int k = 0; k <= di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx2) += jx2[i][j][k]; + } + } + } + + for (int i = 0; i <= di_x1; ++i) { + for (int j = 0; j <= di_x2; ++j) { + for (int k = 0; k < di_x3; ++k) { + J_acc(i1_min + i, i2_min + j, i3_min + k, cur::jx3) += jx3[i][j][k]; } } } } - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order"); + } } - } - }; -} // namespace kernel + }; + } // namespace kernel #undef i_di_to_Xi -// -// } else if constexpr (O == 2u) { -// /* -// * Higher order charge conserving current deposition based on -// * Esirkepov (2001) https://ui.adsabs.harvard.edu/abs/2001CoPhC.135..144E/abstract -// **/ - -// // iS -> shape function for init position -// // fS -> shape function for final position - -// // shape function at integer points (one coeff is always ZERO) -// int i1_min; -// real_t iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3; -// real_t fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3; - -// // clang-format off -// prtl_shape::for_deposit_2nd(i1_prev(p), static_cast(dx1_prev(p)), -// i1(p), static_cast(dx1(p)), -// i1_min, -// iS_x1_0, iS_x1_1, iS_x1_2, iS_x1_3, -// fS_x1_0, fS_x1_1, fS_x1_2, fS_x1_3); -// // clang-format on - -// if constexpr (D == Dim::_1D) { -// raise::KernelNotImplementedError(HERE); -// } else if constexpr (D == Dim::_2D) { - -// // shape function at integer points (one coeff is always ZERO) -// int i2_min; -// real_t iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3; -// real_t fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3; - -// // clang-format off -// prtl_shape::for_deposit_2nd(i2_prev(p), static_cast(dx2_prev(p)), -// i2(p), static_cast(dx2(p)), -// i2_min, -// iS_x2_0, iS_x2_1, iS_x2_2, iS_x2_3, -// fS_x2_0, fS_x2_1, fS_x2_2, fS_x2_3); -// // clang-format on -// // x1-components -// const auto Wx1_00 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_01 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_02 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_03 = HALF * (fS_x1_0 - iS_x1_0) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_10 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_11 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_12 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_13 = HALF * (fS_x1_1 - iS_x1_1) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_20 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_21 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_22 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_23 = HALF * (fS_x1_2 - iS_x1_2) * (fS_x2_3 + iS_x2_3); - -// const auto Wx1_30 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_0 + iS_x2_0); -// const auto Wx1_31 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_1 + iS_x2_1); -// const auto Wx1_32 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_2 + iS_x2_2); -// const auto Wx1_33 = HALF * (fS_x1_3 - iS_x1_3) * (fS_x2_3 + iS_x2_3); - -// // x2-components -// const auto Wx2_00 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_01 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_02 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_03 = HALF * (fS_x1_0 + iS_x1_0) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_10 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_11 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_12 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_13 = HALF * (fS_x1_1 + iS_x1_1) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_20 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_21 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_22 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_23 = HALF * (fS_x1_2 + iS_x1_2) * (fS_x2_3 - iS_x2_3); - -// const auto Wx2_30 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_0 - iS_x2_0); -// const auto Wx2_31 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_1 - iS_x2_1); -// const auto Wx2_32 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_2 - iS_x2_2); -// const auto Wx2_33 = HALF * (fS_x1_3 + iS_x1_3) * (fS_x2_3 - iS_x2_3); - -// // x3-components -// const auto Wx3_00 = THIRD * (fS_x2_0 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_0 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_01 = THIRD * (fS_x2_1 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_1 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_02 = THIRD * (fS_x2_2 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_2 * (HALF * fS_x1_0 + iS_x1_0)); -// const auto Wx3_03 = THIRD * (fS_x2_3 * (HALF * iS_x1_0 + fS_x1_0) + -// iS_x2_3 * (HALF * fS_x1_0 + iS_x1_0)); - -// const auto Wx3_10 = THIRD * (fS_x2_0 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_0 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_11 = THIRD * (fS_x2_1 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_1 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_12 = THIRD * (fS_x2_2 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_2 * (HALF * fS_x1_1 + iS_x1_1)); -// const auto Wx3_13 = THIRD * (fS_x2_3 * (HALF * iS_x1_1 + fS_x1_1) + -// iS_x2_3 * (HALF * fS_x1_1 + iS_x1_1)); - -// const auto Wx3_20 = THIRD * (fS_x2_0 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_0 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_21 = THIRD * (fS_x2_1 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_1 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_22 = THIRD * (fS_x2_2 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_2 * (HALF * fS_x1_2 + iS_x1_2)); -// const auto Wx3_23 = THIRD * (fS_x2_3 * (HALF * iS_x1_2 + fS_x1_2) + -// iS_x2_3 * (HALF * fS_x1_2 + iS_x1_2)); - -// const auto Wx3_30 = THIRD * (fS_x2_0 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_0 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_31 = THIRD * (fS_x2_1 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_1 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_32 = THIRD * (fS_x2_2 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_2 * (HALF * fS_x1_3 + iS_x1_3)); -// const auto Wx3_33 = THIRD * (fS_x2_3 * (HALF * iS_x1_3 + fS_x1_3) + -// iS_x2_3 * (HALF * fS_x1_3 + iS_x1_3)); - -// // x1-component -// const auto jx1_00 = Wx1_00; -// const auto jx1_10 = jx1_00 + Wx1_10; -// const auto jx1_20 = jx1_10 + Wx1_20; -// const auto jx1_30 = jx1_20 + Wx1_30; - -// const auto jx1_01 = Wx1_01; -// const auto jx1_11 = jx1_01 + Wx1_11; -// const auto jx1_21 = jx1_11 + Wx1_21; -// const auto jx1_31 = jx1_21 + Wx1_31; - -// const auto jx1_02 = Wx1_02; -// const auto jx1_12 = jx1_02 + Wx1_12; -// const auto jx1_22 = jx1_12 + Wx1_22; -// const auto jx1_32 = jx1_22 + Wx1_32; - -// const auto jx1_03 = Wx1_03; -// const auto jx1_13 = jx1_03 + Wx1_13; -// const auto jx1_23 = jx1_13 + Wx1_23; -// const auto jx1_33 = jx1_23 + Wx1_33; - -// // y-component -// const auto jx2_00 = Wx2_00; -// const auto jx2_01 = jx2_00 + Wx2_01; -// const auto jx2_02 = jx2_01 + Wx2_02; -// const auto jx2_03 = jx2_02 + Wx2_03; - -// const auto jx2_10 = Wx2_10; -// const auto jx2_11 = jx2_10 + Wx2_11; -// const auto jx2_12 = jx2_11 + Wx2_12; -// const auto jx2_13 = jx2_12 + Wx2_13; - -// const auto jx2_20 = Wx2_20; -// const auto jx2_21 = jx2_20 + Wx2_21; -// const auto jx2_22 = jx2_21 + Wx2_22; -// const auto jx2_23 = jx2_22 + Wx2_23; - -// const auto jx2_30 = Wx2_30; -// const auto jx2_31 = jx2_30 + Wx2_31; -// const auto jx2_32 = jx2_31 + Wx2_32; -// const auto jx2_33 = jx2_32 + Wx2_33; - -// i1_min += N_GHOSTS; -// i2_min += N_GHOSTS; - -// // @TODO: not sure about the signs here -// const real_t Qdx1dt = -coeff * inv_dt; -// const real_t Qdx2dt = -coeff * inv_dt; -// const real_t QVx3 = coeff * vp[2]; - -// auto J_acc = J.access(); - -// // x1-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx1) += Qdx1dt * jx1_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx1) += Qdx1dt * jx1_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx1) += Qdx1dt * jx1_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx1) += Qdx1dt * jx1_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx1) += Qdx1dt * jx1_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx1) += Qdx1dt * jx1_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx1) += Qdx1dt * jx1_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx1) += Qdx1dt * jx1_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx1) += Qdx1dt * jx1_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx1) += Qdx1dt * jx1_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx1) += Qdx1dt * jx1_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx1) += Qdx1dt * jx1_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx1) += Qdx1dt * jx1_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx1) += Qdx1dt * jx1_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx1) += Qdx1dt * jx1_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx1) += Qdx1dt * jx1_33; - -// // x2-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx2) += Qdx2dt * jx2_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx2) += Qdx2dt * jx2_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx2) += Qdx2dt * jx2_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx2) += Qdx2dt * jx2_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx2) += Qdx2dt * jx2_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx2) += Qdx2dt * jx2_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx2) += Qdx2dt * jx2_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx2) += Qdx2dt * jx2_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx2) += Qdx2dt * jx2_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx2) += Qdx2dt * jx2_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx2) += Qdx2dt * jx2_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx2) += Qdx2dt * jx2_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx2) += Qdx2dt * jx2_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx2) += Qdx2dt * jx2_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx2) += Qdx2dt * jx2_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx2) += Qdx2dt * jx2_33; - -// // x3-currents -// J_acc(i1_min + 0, i2_min + 0, cur::jx3) += QVx3 * Wx3_00; -// J_acc(i1_min + 0, i2_min + 1, cur::jx3) += QVx3 * Wx3_01; -// J_acc(i1_min + 0, i2_min + 2, cur::jx3) += QVx3 * Wx3_02; -// J_acc(i1_min + 0, i2_min + 3, cur::jx3) += QVx3 * Wx3_03; - -// J_acc(i1_min + 1, i2_min + 0, cur::jx3) += QVx3 * Wx3_10; -// J_acc(i1_min + 1, i2_min + 1, cur::jx3) += QVx3 * Wx3_11; -// J_acc(i1_min + 1, i2_min + 2, cur::jx3) += QVx3 * Wx3_12; -// J_acc(i1_min + 1, i2_min + 3, cur::jx3) += QVx3 * Wx3_13; - -// J_acc(i1_min + 2, i2_min + 0, cur::jx3) += QVx3 * Wx3_20; -// J_acc(i1_min + 2, i2_min + 1, cur::jx3) += QVx3 * Wx3_21; -// J_acc(i1_min + 2, i2_min + 2, cur::jx3) += QVx3 * Wx3_22; -// J_acc(i1_min + 2, i2_min + 3, cur::jx3) += QVx3 * Wx3_23; - -// J_acc(i1_min + 3, i2_min + 0, cur::jx3) += QVx3 * Wx3_30; -// J_acc(i1_min + 3, i2_min + 1, cur::jx3) += QVx3 * Wx3_31; -// J_acc(i1_min + 3, i2_min + 2, cur::jx3) += QVx3 * Wx3_32; -// J_acc(i1_min + 3, i2_min + 3, cur::jx3) += QVx3 * Wx3_33; - -// } else if constexpr (D == Dim::_3D) { -// raise::KernelNotImplementedError(HERE); -// } // dimension #endif // KERNELS_CURRENTS_DEPOSIT_HPP From bd72a7e0422ce9ef09d4e3c68e5295f71940a52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 29 Aug 2025 14:55:39 -0500 Subject: [PATCH 061/154] bugfix in first order shape function --- src/kernels/particle_shapes.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c35642d5f..10e2ddaac 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -33,8 +33,8 @@ namespace prtl_shape { S[1] = ONE - S[0]; } else { i_min = i; - S[1] = static_cast(1.5) - di; - S[0] = ONE - S[1]; + S[0] = static_cast(1.5) - di; + S[1] = ONE - S[0]; } } // staggered } else if constexpr (O == 2u) { From 6568008f0761be6ba3b284fffc0cd890cca4949c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Fri, 29 Aug 2025 14:56:01 -0500 Subject: [PATCH 062/154] cleanup --- src/kernels/particle_pusher_sr.hpp | 45 +++++++++++++----------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 873f488c0..bf4cfd2d6 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -478,12 +478,7 @@ namespace kernel::sr { bool is_gca { false }; // field interpolation 1st-6th order - //getInterpFlds(p, ei, bi); - - for (auto i { 0u }; i < 3u; ++i) { - ei[i] = ZERO; - bi[i] = ZERO; - } + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -840,7 +835,7 @@ namespace kernel::sr { vec_t& b0) const { // ToDo: implement template in srpic.hpp - const unsigned int O = 2u; + const unsigned int O = 1u; // ToDo: change to 1u! if constexpr (O == 0u) { @@ -1180,7 +1175,7 @@ namespace kernel::sr { // Ex1 -- dual, primal e0[0] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::ex1); } @@ -1190,7 +1185,7 @@ namespace kernel::sr { // Ex2 -- primal, dual e0[1] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::ex2); } @@ -1200,7 +1195,7 @@ namespace kernel::sr { // Ex3 -- primal, primal e0[2] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, em::ex3); } @@ -1210,7 +1205,7 @@ namespace kernel::sr { // Bx1 -- primal, dual b0[0] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, em::bx1); } @@ -1220,7 +1215,7 @@ namespace kernel::sr { // Bx2 -- dual, primal b0[1] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, em::bx2); } @@ -1230,7 +1225,7 @@ namespace kernel::sr { // Bx3 -- dual, dual b0[2] = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c0 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, em::bx3); } @@ -1268,9 +1263,9 @@ namespace kernel::sr { // Ex1 -- dual, primal, primal e0[0] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, kp_min + idx3, em::ex1); @@ -1283,9 +1278,9 @@ namespace kernel::sr { // Ex2 -- primal, dual, primal e0[1] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, kp_min + idx3, em::ex2); @@ -1298,9 +1293,9 @@ namespace kernel::sr { // Ex3 -- primal, primal, dual e0[2] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jp_min + idx2, kd_min + idx3, em::ex3); @@ -1313,9 +1308,9 @@ namespace kernel::sr { // Bx1 -- primal, dual, dual b0[0] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1p[idx1] * EB(ip_min + idx1, jd_min + idx2, kd_min + idx3, em::bx1); @@ -1328,9 +1323,9 @@ namespace kernel::sr { // Bx2 -- dual, primal, dual b0[1] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jp_min + idx2, kd_min + idx3, em::bx2); @@ -1343,9 +1338,9 @@ namespace kernel::sr { // Bx3 -- dual, dual, primal b0[2] = ZERO; for (int idx3 = 0; idx3 < O + 1; idx3++) { - real_t c0 = 0.0; + real_t c0 = ZERO; for (int idx2 = 0; idx2 < O + 1; idx2++) { - real_t c00 = 0.0; + real_t c00 = ZERO; for (int idx1 = 0; idx1 < O + 1; idx1++) { c00 += S1d[idx1] * EB(id_min + idx1, jd_min + idx2, kp_min + idx3, em::bx3); From ffde338cff176188024f0f6f09441ca1ed7615ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Tue, 9 Sep 2025 16:48:56 -0500 Subject: [PATCH 063/154] bugfix in static_cast --- src/kernels/particle_shapes.hpp | 238 +++++++++++--------------------- 1 file changed, 81 insertions(+), 157 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 10e2ddaac..0ad83b109 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -49,15 +49,15 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(3 / 2) - di); - S[2] = HALF * SQR(di - HALF); - S[1] = ONE - S[0] - S[2]; + S[0] = HALF * SQR(static_cast(3.0 / 2.0) - di); + S[1] = THREE_FOURTHS - SQR(ONE - di); + S[2] = ONE - S[0] - S[1]; } } else { // compute at i + 1/2 positions i_min = i - 1; - S[1] = THREE_FOURTHS - SQR(di - HALF); - S[2] = HALF * SQR(di); - S[0] = ONE - S[1] - S[2]; + S[0] = HALF * SQR(ONE - di); + S[2] = HALF * SQR(di); + S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 @@ -65,24 +65,24 @@ namespace prtl_shape { // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1 / 6) * CUBE(ONE - di); - S[3] = static_cast(1 / 6) * CUBE(di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1 / 6) * CUBE(HALF - di); - S[3] = static_cast(1 / 6) * CUBE(HALF + di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1 / 6) * CUBE(HALF + di); - S[3] = static_cast(1 / 6) * CUBE(HALF + di); - S[1] = static_cast(1 / 6) * + S[0] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -94,37 +94,37 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1 / 25) * SQR(SQR(HALF - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(HALF + di)); - S[1] = static_cast(5 / 8) - SQR(ONE + di) + - static_cast(32 / 45) * CUBE(ONE + di) - - static_cast(98 / 675) * SQR(SQR(ONE + di)); - S[2] = static_cast(5 / 8) - SQR(di) + - static_cast(32 / 45) * CUBE(di) - - static_cast(98 / 675) * SQR(SQR(di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); + S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + + static_cast(32.0 / 45.0) * CUBE(ONE + di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); + S[2] = static_cast(5.0 / 8.0) - SQR(di) + + static_cast(32.0 / 45.0) * CUBE(di) - + static_cast(98.0 / 675.0) * SQR(SQR(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 1; - S[0] = static_cast(1 / 25) * SQR(SQR(THREE * HALF - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(di - HALF)); - S[1] = static_cast(5 / 8) - SQR(di) + - static_cast(32 / 45) * CUBE(di) - - static_cast(98 / 675) * SQR(SQR(di)); - S[2] = static_cast(5 / 8) - SQR(ONE - di) + - static_cast(32 / 45) * CUBE(ONE - di) - - static_cast(98 / 675) * SQR(SQR(ONE - di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(THREE * HALF - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[1] = static_cast(5.0 / 8.0) - SQR(di) + + static_cast(32.0 / 45.0) * CUBE(di) - + static_cast(98.0 / 675.0) * SQR(SQR(di)); + S[2] = static_cast(5.0 / 8.0) - SQR(ONE - di) + + static_cast(32.0 / 45.0) * CUBE(ONE - di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = static_cast(1 / 25) * SQR(SQR(ONE - di)); - S[4] = static_cast(1 / 25) * SQR(SQR(di)); - S[1] = static_cast(5 / 8) - SQR(HALF + di) + - static_cast(32 / 45) * CUBE(HALF + di) - - static_cast(98 / 675) * SQR(SQR(HALF + di)); - S[2] = static_cast(5 / 8) - SQR(HALF - di) + - static_cast(32 / 45) * CUBE(HALF - di) - - static_cast(98 / 675) * SQR(SQR(HALF - di)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); + S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + + static_cast(32.0 / 45.0) * CUBE(HALF + di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); + S[2] = static_cast(5.0 / 8.0) - SQR(HALF - di) + + static_cast(32.0 / 45.0) * CUBE(HALF - di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF - di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } // staggered } else if constexpr (O == 5u) { @@ -133,146 +133,70 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1 / 135) * SQR(CUBE(ONE - di)); // - S[1] = static_cast(3 / 5) - SQR(ONE + di) + - static_cast(5 / 6) * CUBE(ONE + di) - - static_cast(19 / 72) * SQR(SQR(ONE + di)) + - static_cast(13 / 432) * SQR(CUBE(ONE + di)); - S[2] = static_cast(3 / 5) - SQR(di) + - static_cast(5 / 6) * CUBE(di) - - static_cast(19 / 72) * SQR(SQR(di)) + - static_cast(13 / 432) * SQR(CUBE(di)); - S[3] = static_cast(3 / 5) - SQR(ONE - di) + - static_cast(5 / 6) * CUBE(ONE - di) - - static_cast(19 / 72) * SQR(SQR(ONE - di)) + - static_cast(13 / 432) * SQR(CUBE(ONE - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(di)); + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(ONE - di)); // + S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + + static_cast(5.0 / 6.0) * CUBE(ONE + di) - + static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(ONE + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di) + + static_cast(5.0 / 6.0) * CUBE(di) - + static_cast(19.0 / 72.0) * SQR(SQR(di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di)); + S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + + static_cast(5.0 / 6.0) * CUBE(ONE - di) - + static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(ONE - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - S[0] = static_cast(1 / 135) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3 / 5) - + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); + S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(3 / 2) + di) + - static_cast(5 / 6) * + static_cast(5.0 / 6.0) * CUBE(static_cast(3 / 2) + di) - - static_cast(19 / 72) * + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(3 / 2) + di)) + - static_cast(13 / 432) * + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(3 / 2) + di)); - S[2] = static_cast(3 / 5) - SQR(HALF + di) + - static_cast(5 / 6) * CUBE(HALF + di) - - static_cast(19 / 72) * SQR(SQR(HALF + di)) + - static_cast(13 / 432) * SQR(CUBE(HALF + di)); - S[3] = static_cast(3 / 5) - SQR(HALF - di) + - static_cast(5 / 6) * CUBE(HALF - di) - - static_cast(19 / 72) * SQR(SQR(HALF - di)) + - static_cast(13 / 432) * SQR(CUBE(HALF - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[3] = static_cast(3.0 / 5.0) - SQR(HALF - di) + + static_cast(5.0 / 6.0) * CUBE(HALF - di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } else { i_min = i - 2; - S[0] = static_cast(1 / 135) * + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(3 / 2) - di)); - S[1] = static_cast(3 / 5) - SQR(HALF + di) + - static_cast(5 / 6) * CUBE(HALF + di) - - static_cast(19 / 72) * SQR(SQR(HALF + di)) + - static_cast(13 / 432) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3 / 5) - SQR(di - HALF) + - static_cast(5 / 6) * CUBE(di - HALF) - - static_cast(19 / 72) * SQR(SQR(di - HALF)) + - static_cast(13 / 432) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3 / 5) - + S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + + static_cast(5.0 / 6.0) * CUBE(di - HALF) - + static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); + S[3] = static_cast(3.0 / 5.0) - SQR(static_cast(3 / 2) - di) + - static_cast(5 / 6) * + static_cast(5.0 / 6.0) * CUBE(static_cast(3 / 2) - di) - - static_cast(19 / 72) * + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(3 / 2) - di)) + - static_cast(13 / 432) * + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(3 / 2) - di)); - S[5] = static_cast(1 / 135) * SQR(CUBE(di - HALF)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); S[3] = ONE - S[0] - S[1] - S[2] - S[4]; } } // staggered } } - Inline void for_deposit_2nd(const int& i_init, - const real_t& di_init, - const int& i_fin, - const real_t& di_fin, - int& i_min, - real_t& iS_0, - real_t& iS_1, - real_t& iS_2, - real_t& iS_3, - real_t& fS_0, - real_t& fS_1, - real_t& fS_2, - real_t& fS_3) { - - /* - The second order shape function per particle is a 4 element array - where the shape function contributes to only 3 elements. - We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position - - Let * be the particle position at the current timestep - Let x be the particle position at the previous timestep - - - 0 1 2 3 - ____________________________ - | x* | x* | x* | | // i_init_min = i_fin_min - |______|______|______|______| - | x | x* | x* | * | // i_init_min < i_fin_min - |______|______|______|______| - | * | x* | x* | x | // i_init_min > i_fin_min - |______|______|______|______| - */ - - int i_init_min, i_fin_min; - - real_t iS_[3], fS_[3]; - - order(i_init, di_init, i_init_min, iS_); - order(i_fin, di_fin, i_fin_min, fS_); - - if (i_init_min < i_fin_min) { - i_min = i_init_min; - iS_0 = iS_[0]; - iS_1 = iS_[1]; - iS_2 = iS_[2]; - iS_3 = ZERO; - - fS_0 = ZERO; - fS_1 = iS_[0]; - fS_2 = iS_[1]; - fS_3 = iS_[2]; - } else if (i_init_min > i_fin_min) { - i_min = i_fin_min; - iS_0 = ZERO; - iS_1 = iS_[0]; - iS_2 = iS_[1]; - iS_3 = iS_[2]; - - fS_0 = iS_[0]; - fS_1 = iS_[1]; - fS_2 = iS_[2]; - fS_3 = ZERO; - } else { - i_min = i_init_min; - iS_0 = iS_[0]; - iS_1 = iS_[1]; - iS_2 = iS_[2]; - iS_3 = ZERO; - - fS_0 = iS_[0]; - fS_1 = iS_[1]; - fS_2 = iS_[2]; - fS_3 = ZERO; - } - } template Inline void for_deposit(const int& i_init, From dbb6b42d5f192d911c91b9fda6d1321f5cc37dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 17:28:08 -0500 Subject: [PATCH 064/154] bugfix in 3rd order shape function --- src/kernels/particle_shapes.hpp | 143 ++++++++++++++++---------------- 1 file changed, 70 insertions(+), 73 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0ad83b109..d0e318f1d 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -60,72 +60,75 @@ namespace prtl_shape { S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { - // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^2) |x| < 1 + // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(HALF - di) + THREE * CUBE(HALF - di)); + (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered } else if constexpr (O == 4u) { - // 1/25 * ( 5/2 - |x|)^4 |x| < 3/2 - // S(x) = 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 3/2 ≤ |x| < 5/2 + // 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 |x| < 3/2 + // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 // 0.0 |x| ≥ 5/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + static_cast(32.0 / 45.0) * CUBE(ONE + di) - static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(di) + + S[2] = static_cast(5.0 / 8.0) - SQR(di) + static_cast(32.0 / 45.0) * CUBE(di) - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[3] = static_cast(5.0 / 8.0) - SQR(ONE - di) + + static_cast(32.0 / 45.0) * CUBE(ONE - di) - + static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(THREE * HALF - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(static_cast(1.5) - di)); S[1] = static_cast(5.0 / 8.0) - SQR(di) + static_cast(32.0 / 45.0) * CUBE(di) - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[2] = static_cast(5.0 / 8.0) - SQR(ONE - di) + - static_cast(32.0 / 45.0) * CUBE(ONE - di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + + static_cast(32.0 / 45.0) * CUBE(TWO - di) - + static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } } else { // compute at i + 1/2 positions - i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); - S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + - static_cast(32.0 / 45.0) * CUBE(HALF + di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(HALF - di) + - static_cast(32.0 / 45.0) * CUBE(HALF - di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF - di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + i_min = i - 2; + S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); + S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + + static_cast(32.0 / 45.0) * CUBE(HALF + di) - + static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); + S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + + static_cast(32.0 / 45.0) * CUBE(TWO - di) - + static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); + S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); + S[2] = ONE - S[0] - S[1] - S[3] - S[4]; } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -133,65 +136,59 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(ONE - di)); // + S[0] = static_cast(1.0 / 135.0) * SQR(SQR(ONE + di))*(ONE - di); S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + static_cast(5.0 / 6.0) * CUBE(ONE + di) - static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(ONE + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di) + + static_cast(13.0 / 432.0) * SQR(SQR(ONE + di))*(ONE + di); + S[2] = static_cast(3.0 / 5.0) - SQR(di) + static_cast(5.0 / 6.0) * CUBE(di) - static_cast(19.0 / 72.0) * SQR(SQR(di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di)); - S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + + static_cast(13.0 / 432.0) * SQR(SQR(di)) * di; + S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + static_cast(5.0 / 6.0) * CUBE(ONE - di) - static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(ONE - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + static_cast(13.0 / 432.0) * SQR(SQR(ONE - di))*(ONE - di); + S[4] = static_cast(3.0 / 5.0) - SQR(TWO - di) + + static_cast(5.0 / 6.0) * CUBE(TWO - di) - + static_cast(19.0 / 72.0) * SQR(SQR(TWO - di)) + + static_cast(13.0 / 432.0) * SQR(SQR(TWO - di))*(TWO - di); + S[5] = static_cast(1.0 / 135.0) * SQR(SQR(di))*di; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3.0 / 5.0) - - SQR(static_cast(3 / 2) + di) + - static_cast(5.0 / 6.0) * - CUBE(static_cast(3 / 2) + di) - - static_cast(19.0 / 72.0) * - SQR(SQR(static_cast(3 / 2) + di)) + - static_cast(13.0 / 432.0) * - SQR(CUBE(static_cast(3 / 2) + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[3] = static_cast(3.0 / 5.0) - SQR(HALF - di) + - static_cast(5.0 / 6.0) * CUBE(HALF - di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) + di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) + di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) - di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) - di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; } else { i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * - SQR(CUBE(static_cast(3 / 2) - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + - static_cast(5.0 / 6.0) * CUBE(di - HALF) - - static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); - S[3] = static_cast(3.0 / 5.0) - - SQR(static_cast(3 / 2) - di) + - static_cast(5.0 / 6.0) * - CUBE(static_cast(3 / 2) - di) - - static_cast(19.0 / 72.0) * - SQR(SQR(static_cast(3 / 2) - di)) + - static_cast(13.0 / 432.0) * - SQR(CUBE(static_cast(3 / 2) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4]; + S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(1.5) - di)); + S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + + static_cast(5.0 / 6.0) * CUBE(HALF + di) - + static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); + S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + + static_cast(5.0 / 6.0) * CUBE(di - HALF) - + static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + + static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); + S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(2.5) - di) + + static_cast(5.0 / 6.0) * CUBE(static_cast(2.5) - di) - + static_cast(19.0 / 72.0) * SQR(SQR(static_cast(2.5) - di)) + + static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(2.5) - di)); + S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); + S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; } } // staggered } From ba443748baccaf484c36c3fe1ce727399d0179b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:01:35 -0500 Subject: [PATCH 065/154] shape function up-to 8th order --- src/engines/srpic.hpp | 6 + src/kernels/currents_deposit.hpp | 2 +- src/kernels/particle_shapes.hpp | 349 ++++++++++++++++++++++++------- 3 files changed, 278 insertions(+), 79 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index a99d33d67..9fc4b7fc4 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -555,6 +555,12 @@ namespace ntt { deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 5) { deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 6) { + deposit_with<6u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 7) { + deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 8) { + deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 040503bcd..c0991650a 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -401,7 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 5u)) { + } else if constexpr ((O >= 1u) and (O <= 8u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index d0e318f1d..2ebf87ac0 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,6 +17,172 @@ namespace prtl_shape { + Inline real_t S4(real_t x) + { + if (x < HALF) + { + return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + + INV_4 * SQR(SQR(x)); + } else if (x < static_cast(1.5)) + { + return static_cast(55.0/96.0) + static_cast(5.0/24.0) * x - + static_cast(5.0 / 4.0) * SQR(x) + static_cast(5.0 / 6.0) * CUBE(x) - + static_cast(1.0 / 6.0) * SQR(SQR(x)); + } else if (x < static_cast(2.5)) + { + return static_cast(625.0/384.0) - static_cast(125.0/48.0) * x + + static_cast(25.0 / 16.0) * SQR(x) - static_cast(5.0 / 12.0) * CUBE(x) + + static_cast(1.0 / 24.0) * SQR(SQR(x)); + } else { + return ZERO; + } + } + + Inline real_t S5(real_t x) + { + if (x <= ONE) + { + return static_cast(11.0 / 20.0) - HALF * SQR(x) + + INV_4 * SQR(SQR(x)) - static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); + } else if (x < TWO) + { + return static_cast(17.0/40.0) + FIVE * INV_8 * x - + static_cast(7.0) * INV_4 * SQR(x) + FIVE * INV_4 * CUBE(x) - + THREE * INV_8 * SQR(SQR(x)) + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); + } else if (x < THREE) + { + return static_cast(81.0/40.0) - static_cast(27.0/8.0) * x + + static_cast(9.0) * INV_4 * SQR(x) - THREE_FOURTHS * CUBE(x) + + INV_8 * SQR(SQR(x)) - static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); + } else { + return ZERO; + } + } + + Inline real_t S6(real_t x) + { + if (x <= HALF) { + return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(1.5)) { + return static_cast(7861.0/15360.0) - static_cast(7.0/768.0) * x - + static_cast(91.0/256.0) * SQR(x) - static_cast(35.0/288.0) * CUBE(x) + + static_cast(21.0/64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 48.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(2.5)) { + return static_cast(1379.0/7680.0) + + static_cast(1267.0/960.0) * x - + static_cast(329.0/128.0) * SQR(x) + + static_cast(133.0/72.0) * CUBE(x) - + static_cast(21.0/32.0) * SQR(SQR(x)) + + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 120.0) * SQR(SQR(x)) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(117649.0/46080.0) - + static_cast(16807.0/3840.0) * x + + static_cast(2401.0/768.0) * SQR(x) - + static_cast(343.0/288.0) * CUBE(x) + + static_cast(49.0/192.0) * SQR(SQR(x)) - + static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 720.0) * SQR(SQR(x)) * SQR(x); + } else { + return ZERO; + } + } + + Inline real_t S7(real_t x) + { + if (x < ONE) { + return static_cast(151.0) / static_cast(315.0) - + THIRD * SQR(x) + + static_cast(1.0) / static_cast(9.0) * SQR(SQR(x)) - + static_cast(1.0) / static_cast(36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0) / static_cast(144.0) * SQR(SQR(x)) * CUBE(x); + } else if (x <= TWO) { + return static_cast(103.0)/static_cast(210.0) - + static_cast(7.0)/static_cast(90.0) * x - + static_cast(1.0)/static_cast(10.0) * SQR(x) - + static_cast(7.0)/static_cast(18.0) * CUBE(x) + + HALF * SQR(SQR(x)) - + static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + + static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0) / static_cast(270.0) * SQR(SQR(x)) * CUBE(x); + } else if (x < THREE) { + return static_cast(217.0)/static_cast(90.0) * x - + static_cast(23.0)/static_cast(6.0) * SQR(x) + + static_cast(49.0)/static_cast(18.0) * CUBE(x) - + static_cast(19.0)/static_cast(18.0) * SQR(SQR(x)) + + static_cast(7.0)/static_cast(30.0) * CUBE(x) * SQR(x) - + static_cast(1.0)/static_cast(36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0)/static_cast(720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0)/static_cast(630.0); + } else if (x < FOUR) { + return static_cast(1024.0)/static_cast(315.0) - + static_cast(256.0)/static_cast(45.0) * x + + static_cast(64.0)/static_cast(15.0) * SQR(x) - + static_cast(16.0)/static_cast(9.0) * CUBE(x) + + static_cast(4.0)/static_cast(9.0) * SQR(SQR(x)) - + static_cast(1.0)/static_cast(15.0) * CUBE(x) * SQR(x) + + static_cast(1.0)/static_cast(180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0)/static_cast(5040.0) * SQR(SQR(x)) * CUBE(x); + } else { + return ZERO; + } + } + + Inline real_t S8(real_t x) + { + if (x < HALF) { + return static_cast(259723.0 / 573440.0) - + static_cast(289.0 / 1024.0) * SQR(x) + + static_cast(43.0 / 512.0) * SQR(SQR(x)) - + static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); + } else if (x <= static_cast(1.5)) { + return static_cast(64929.0/143360.0) + + static_cast(1.0/5120.0) * x - + static_cast(363.0/1280.0) * SQR(x) + + static_cast(7.0/1280.0) * CUBE(x) + + static_cast(9.0/128.0) * SQR(SQR(x)) + + static_cast(7.0 / 320.0) * CUBE(x) * SQR(x) - + static_cast(3.0 / 80.0) * SQR(CUBE(x)) + + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 720.0) * SQR(SQR(SQR(x))); + } else if (x < static_cast(2.5)) { + return static_cast(145167.0/286720.0) - + static_cast(1457.0/5120.0) * x + + static_cast(195.0/512.0) * SQR(x) - + static_cast(1127.0/1280.0) * CUBE(x) + + static_cast(207.0/256.0) * SQR(SQR(x)) - + static_cast(119.0 / 320.0) * CUBE(x) * SQR(x) + + static_cast(3.0 / 32.0) * SQR(CUBE(x)) - + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))); + } else if (x < static_cast(3.5)) { + return static_cast(146051.0/35840.0) * x - + static_cast(1465.0/256.0) * SQR(x) + + static_cast(5123.0/1280.0) * CUBE(x) - + static_cast(209.0/128.0) * SQR(SQR(x)) + + static_cast(131.0 / 320.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 16.0) * SQR(CUBE(x)) + + static_cast(3.0 / 560.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 5040.0) * SQR(SQR(SQR(x))) - + static_cast(122729.0/143360.0); + } else if (x < static_cast(4.5)) { + return static_cast(4782969.0/1146880.0) - + static_cast(531441.0/71680.0) * x + + static_cast(59049.0/10240.0) * SQR(x) - + static_cast(6561.0/2560.0) * CUBE(x) + + static_cast(729.0/1024.0) * SQR(SQR(x)) - + static_cast(81.0 / 640.0) * CUBE(x) * SQR(x) + + static_cast(9.0 / 640.0) * SQR(CUBE(x)) - + static_cast(1.0 / 1120.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 40320.0) * SQR(SQR(SQR(x))); + } else { + return ZERO; + } + } + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -92,43 +258,36 @@ namespace prtl_shape { // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 // 0.0 |x| ≥ 5/2 if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(HALF - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(ONE + di) + - static_cast(32.0 / 45.0) * CUBE(ONE + di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE + di)); - S[2] = static_cast(5.0 / 8.0) - SQR(di) + - static_cast(32.0 / 45.0) * CUBE(di) - - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = static_cast(5.0 / 8.0) - SQR(ONE - di) + - static_cast(32.0 / 45.0) * CUBE(ONE - di) - - static_cast(98.0 / 675.0) * SQR(SQR(ONE - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(HALF + di)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } else { i_min = i - 1; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(static_cast(1.5) - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(di) + - static_cast(32.0 / 45.0) * CUBE(di) - - static_cast(98.0 / 675.0) * SQR(SQR(di)); - S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + - static_cast(32.0 / 45.0) * CUBE(TWO - di) - - static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di - HALF)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } } else { // compute at i + 1/2 positions i_min = i - 2; - S[0] = static_cast(1.0 / 25.0) * SQR(SQR(ONE - di)); - S[1] = static_cast(5.0 / 8.0) - SQR(HALF + di) + - static_cast(32.0 / 45.0) * CUBE(HALF + di) - - static_cast(98.0 / 675.0) * SQR(SQR(HALF + di)); - S[3] = static_cast(5.0 / 8.0) - SQR(TWO - di) + - static_cast(32.0 / 45.0) * CUBE(TWO - di) - - static_cast(98.0 / 675.0) * SQR(SQR(TWO - di)); - S[4] = static_cast(1.0 / 25.0) * SQR(SQR(di)); - S[2] = ONE - S[0] - S[1] - S[3] - S[4]; + + for (int n = 0; n < 5; n++) { + S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); + } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -136,61 +295,95 @@ namespace prtl_shape { // 0.0 |x| ≥ 3 if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(SQR(ONE + di))*(ONE - di); - S[1] = static_cast(3.0 / 5.0) - SQR(ONE + di) + - static_cast(5.0 / 6.0) * CUBE(ONE + di) - - static_cast(19.0 / 72.0) * SQR(SQR(ONE + di)) + - static_cast(13.0 / 432.0) * SQR(SQR(ONE + di))*(ONE + di); - S[2] = static_cast(3.0 / 5.0) - SQR(di) + - static_cast(5.0 / 6.0) * CUBE(di) - - static_cast(19.0 / 72.0) * SQR(SQR(di)) + - static_cast(13.0 / 432.0) * SQR(SQR(di)) * di; - S[3] = static_cast(3.0 / 5.0) - SQR(ONE - di) + - static_cast(5.0 / 6.0) * CUBE(ONE - di) - - static_cast(19.0 / 72.0) * SQR(SQR(ONE - di)) + - static_cast(13.0 / 432.0) * SQR(SQR(ONE - di))*(ONE - di); - S[4] = static_cast(3.0 / 5.0) - SQR(TWO - di) + - static_cast(5.0 / 6.0) * CUBE(TWO - di) - - static_cast(19.0 / 72.0) * SQR(SQR(TWO - di)) + - static_cast(13.0 / 432.0) * SQR(SQR(TWO - di))*(TWO - di); - S[5] = static_cast(1.0 / 135.0) * SQR(SQR(di))*di; + + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); + } + + Kokkos::printf("S: %e %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4], S[5]); + Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4] + S[5]); } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) + di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) + di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(1.5) - di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(1.5) - di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(1.5) - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(1.5) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(HALF + di)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; + + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } + } else { + i_min = i - 2; + for (int n = 0; n < 6; n++) { + S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } + } + } // staggered + } else if constexpr (O == 6u) { + if constexpr (not STAGGERED) { // compute at i positions + + if (di < HALF) { + i_min = i - 3; + + for (int n = 0; n < 7; n++) { + S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); + } } else { i_min = i - 2; - S[0] = static_cast(1.0 / 135.0) * SQR(CUBE(static_cast(1.5) - di)); - S[1] = static_cast(3.0 / 5.0) - SQR(HALF + di) + - static_cast(5.0 / 6.0) * CUBE(HALF + di) - - static_cast(19.0 / 72.0) * SQR(SQR(HALF + di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(HALF + di)); - S[2] = static_cast(3.0 / 5.0) - SQR(di - HALF) + - static_cast(5.0 / 6.0) * CUBE(di - HALF) - - static_cast(19.0 / 72.0) * SQR(SQR(di - HALF)) + - static_cast(13.0 / 432.0) * SQR(CUBE(di - HALF)); - S[4] = static_cast(3.0 / 5.0) - SQR(static_cast(2.5) - di) + - static_cast(5.0 / 6.0) * CUBE(static_cast(2.5) - di) - - static_cast(19.0 / 72.0) * SQR(SQR(static_cast(2.5) - di)) + - static_cast(13.0 / 432.0) * SQR(CUBE(static_cast(2.5) - di)); - S[5] = static_cast(1.0 / 135.0) * SQR(CUBE(di - HALF)); - S[3] = ONE - S[0] - S[1] - S[2] - S[4] - S[5]; + + for (int n = 0; n < 5; n++) { + S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); + } } + } else { // compute at i + 1/2 positions + i_min = i - 3; + + for (int n = 0; n < 5; n++) { + S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } } // staggered + } else if constexpr (O == 7u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 3; + + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 4; + + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } else { + i_min = i - 2; + for (int n = 0; n < 8; n++) { + S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } + } + } // staggered + } else if constexpr (O == 8u) { + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 4; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } else { + i_min = i - 3; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); + } + } + } else { // compute at i + 1/2 positions + i_min = i - 4; + + for (int n = 0; n < 9; n++) { + S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } // staggered + } else { + //ERROR("Interpolation order not implemented"); } } From 13a85226e3717dbd2c618b056df262276e8e7d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:08:12 -0500 Subject: [PATCH 066/154] bugfix, loop unroll pragma and error handling fix --- src/kernels/particle_shapes.hpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 2ebf87ac0..8b6e6111d 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -262,6 +262,7 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -271,6 +272,7 @@ namespace prtl_shape { } else { i_min = i - 1; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } @@ -281,6 +283,7 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } @@ -296,6 +299,7 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -306,11 +310,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } } else { i_min = i - 2; + + #pragma unroll for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } @@ -322,12 +329,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 3; + #pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); } } else { i_min = i - 2; + #pragma unroll for (int n = 0; n < 5; n++) { S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } @@ -335,6 +344,7 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 3; + #pragma unroll for (int n = 0; n < 5; n++) { S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } @@ -343,6 +353,7 @@ namespace prtl_shape { if constexpr (not STAGGERED) { // compute at i positions i_min = i - 3; + #pragma unroll for (int n = 0; n < 8; n++) { S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); } @@ -354,7 +365,9 @@ namespace prtl_shape { S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } else { - i_min = i - 2; + i_min = i - 3; + + #pragma unroll for (int n = 0; n < 8; n++) { S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } @@ -365,12 +378,14 @@ namespace prtl_shape { if (di < HALF) { i_min = i - 4; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { i_min = i - 3; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); } @@ -378,12 +393,13 @@ namespace prtl_shape { } else { // compute at i + 1/2 positions i_min = i - 4; + #pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } // staggered } else { - //ERROR("Interpolation order not implemented"); + raise::KernelError(HERE, "Unsupported interpolation order"); } } From af001b7f00058a03047cf813df03b59e309ebf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:15:01 -0500 Subject: [PATCH 067/154] remove print --- src/kernels/particle_shapes.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 8b6e6111d..0ad3d0fda 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -266,9 +266,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } else { i_min = i - 1; @@ -276,9 +273,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); } } else { // compute at i + 1/2 positions i_min = i - 2; @@ -287,10 +281,6 @@ namespace prtl_shape { for (int n = 0; n < 5; n++) { S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4]); - } // staggered } else if constexpr (O == 5u) { // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 @@ -303,9 +293,6 @@ namespace prtl_shape { for (int n = 0; n < 6; n++) { S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } - - Kokkos::printf("S: %e %e %e %e %e %e\n", S[0], S[1], S[2], S[3], S[4], S[5]); - Kokkos::printf("Sum: %e\n", S[0] + S[1] + S[2] + S[3] + S[4] + S[5]); } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; From 9b3bcd5d4384ee14a104a5b722a81d6557edd35f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Wed, 10 Sep 2025 23:28:30 -0500 Subject: [PATCH 068/154] bugfixes --- src/kernels/particle_shapes.hpp | 39 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0ad3d0fda..46300cf05 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,7 +17,7 @@ namespace prtl_shape { - Inline real_t S4(real_t x) + Inline real_t S4(const real_t x) { if (x < HALF) { @@ -38,7 +38,7 @@ namespace prtl_shape { } } - Inline real_t S5(real_t x) + Inline real_t S5(const real_t x) { if (x <= ONE) { @@ -59,16 +59,21 @@ namespace prtl_shape { } } - Inline real_t S6(real_t x) + Inline real_t S6(const real_t x) { if (x <= HALF) { - return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + - static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x); + return static_cast(5887.0 / 11520.0) - + static_cast(77.0 / 192.0) * SQR(x) + + static_cast(7.0 / 48.0) * SQR(SQR(x)) - + static_cast(1.0 / 36.0) * SQR(CUBE(x)); } else if (x < static_cast(1.5)) { - return static_cast(7861.0/15360.0) - static_cast(7.0/768.0) * x - - static_cast(91.0/256.0) * SQR(x) - static_cast(35.0/288.0) * CUBE(x) + - static_cast(21.0/64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + - static_cast(1.0 / 48.0) * SQR(SQR(x)) * SQR(x); + return static_cast(7861.0/15360.0) - + static_cast(7.0/768.0) * x - + static_cast(91.0/256.0) * SQR(x) - + static_cast(35.0/288.0) * CUBE(x) + + static_cast(21.0/64.0) * SQR(SQR(x)) - + static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 48.0) * SQR(CUBE(x)); } else if (x < static_cast(2.5)) { return static_cast(1379.0/7680.0) + static_cast(1267.0/960.0) * x - @@ -76,7 +81,7 @@ namespace prtl_shape { static_cast(133.0/72.0) * CUBE(x) - static_cast(21.0/32.0) * SQR(SQR(x)) + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - - static_cast(1.0 / 120.0) * SQR(SQR(x)) * SQR(x); + static_cast(1.0 / 120.0) * SQR(CUBE(x)); } else if (x < static_cast(3.5)) { return static_cast(117649.0/46080.0) - static_cast(16807.0/3840.0) * x + @@ -84,13 +89,13 @@ namespace prtl_shape { static_cast(343.0/288.0) * CUBE(x) + static_cast(49.0/192.0) * SQR(SQR(x)) - static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + - static_cast(1.0 / 720.0) * SQR(SQR(x)) * SQR(x); + static_cast(1.0 / 720.0) * SQR(CUBE(x)); } else { return ZERO; } } - Inline real_t S7(real_t x) + Inline real_t S7(const real_t x) { if (x < ONE) { return static_cast(151.0) / static_cast(315.0) - @@ -106,7 +111,7 @@ namespace prtl_shape { HALF * SQR(SQR(x)) - static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0) / static_cast(270.0) * SQR(SQR(x)) * CUBE(x); + static_cast(1.0) / static_cast(240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { return static_cast(217.0)/static_cast(90.0) * x - static_cast(23.0)/static_cast(6.0) * SQR(x) + @@ -130,7 +135,7 @@ namespace prtl_shape { } } - Inline real_t S8(real_t x) + Inline real_t S8(const real_t x) { if (x < HALF) { return static_cast(259723.0 / 573440.0) - @@ -324,15 +329,15 @@ namespace prtl_shape { i_min = i - 2; #pragma unroll - for (int n = 0; n < 5; n++) { - S[i] = S6(Kokkos::fabs(TWO + di - static_cast(n))); + for (int n = 0; n < 7; n++) { + S[n] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } } } else { // compute at i + 1/2 positions i_min = i - 3; #pragma unroll - for (int n = 0; n < 5; n++) { + for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); } } // staggered From 7741cc59379cbc48ed470986842b5ebbe04f443f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 01:40:31 -0500 Subject: [PATCH 069/154] 9th order shape function --- src/engines/srpic.hpp | 2 + src/kernels/currents_deposit.hpp | 2 +- src/kernels/particle_shapes.hpp | 136 +++++++++++++++++++++++++------ 3 files changed, 112 insertions(+), 28 deletions(-) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 9fc4b7fc4..c0ba54c31 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -561,6 +561,8 @@ namespace ntt { deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); } else if (shape_order == 8) { deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); + } else if (shape_order == 9) { + deposit_with<9u>(species, domain.mesh.metric, scatter_cur, dt); } else { raise::Error("Invalid shape order for current deposition", HERE); } diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index c0991650a..f8c8607a3 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -401,7 +401,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 8u)) { + } else if constexpr ((O >= 1u) and (O <= 9u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 46300cf05..12f126a92 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -98,38 +98,38 @@ namespace prtl_shape { Inline real_t S7(const real_t x) { if (x < ONE) { - return static_cast(151.0) / static_cast(315.0) - + return static_cast(151.0/315.0) - THIRD * SQR(x) + - static_cast(1.0) / static_cast(9.0) * SQR(SQR(x)) - - static_cast(1.0) / static_cast(36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0) / static_cast(144.0) * SQR(SQR(x)) * CUBE(x); + static_cast(1.0/9.0) * SQR(SQR(x)) - + static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0/144.0) * SQR(SQR(x)) * CUBE(x); } else if (x <= TWO) { - return static_cast(103.0)/static_cast(210.0) - - static_cast(7.0)/static_cast(90.0) * x - - static_cast(1.0)/static_cast(10.0) * SQR(x) - - static_cast(7.0)/static_cast(18.0) * CUBE(x) + + return static_cast(103.0/210.0) - + static_cast(7.0/90.0) * x - + static_cast(1.0/10.0) * SQR(x) - + static_cast(7.0/18.0) * CUBE(x) + HALF * SQR(SQR(x)) - - static_cast(7.0) / static_cast(30.0) * CUBE(x) * SQR(x) + - static_cast(1.0) / static_cast(20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0) / static_cast(240.0) * SQR(SQR(x)) * CUBE(x); + static_cast(7.0/30.0) * CUBE(x) * SQR(x) + + static_cast(1.0/20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0/240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { - return static_cast(217.0)/static_cast(90.0) * x - - static_cast(23.0)/static_cast(6.0) * SQR(x) + - static_cast(49.0)/static_cast(18.0) * CUBE(x) - - static_cast(19.0)/static_cast(18.0) * SQR(SQR(x)) + - static_cast(7.0)/static_cast(30.0) * CUBE(x) * SQR(x) - - static_cast(1.0)/static_cast(36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0)/static_cast(720.0) * SQR(SQR(x)) * CUBE(x) - - static_cast(139.0)/static_cast(630.0); + return static_cast(217.0/90.0) * x - + static_cast(23.0/6.0) * SQR(x) + + static_cast(49.0/18.0) * CUBE(x) - + static_cast(19.0/18.0) * SQR(SQR(x)) + + static_cast(7.0/30.0) * CUBE(x) * SQR(x) - + static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0/720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0/630.0); } else if (x < FOUR) { - return static_cast(1024.0)/static_cast(315.0) - - static_cast(256.0)/static_cast(45.0) * x + - static_cast(64.0)/static_cast(15.0) * SQR(x) - - static_cast(16.0)/static_cast(9.0) * CUBE(x) + - static_cast(4.0)/static_cast(9.0) * SQR(SQR(x)) - - static_cast(1.0)/static_cast(15.0) * CUBE(x) * SQR(x) + - static_cast(1.0)/static_cast(180.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0)/static_cast(5040.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(1024.0/315.0) - + static_cast(256.0/45.0) * x + + static_cast(64.0/15.0) * SQR(x) - + static_cast(16.0/9.0) * CUBE(x) + + static_cast(4.0/9.0) * SQR(SQR(x)) - + static_cast(1.0/15.0) * CUBE(x) * SQR(x) + + static_cast(1.0/180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0/5040.0) * SQR(SQR(x)) * CUBE(x); } else { return ZERO; } @@ -188,6 +188,64 @@ namespace prtl_shape { } } + Inline real_t S9(const real_t x) + { + if (x <= ONE) { + return static_cast(15619.0 / 36288.0) - + static_cast(35.0 / 144.0) * SQR(x) + + static_cast(19.0 / 288.0) * SQR(SQR(x)) - + static_cast(5.0 / 432.0) * SQR(CUBE(x)) + + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 2880.0) * SQR(SQR(SQR(x))) * x; + } else if (x < TWO) { + return static_cast(7799.0/18144.0) + + static_cast(1.0/192.0) * x - + static_cast(19.0/72.0) * SQR(x) + + static_cast(7.0/144.0) * CUBE(x) - + static_cast(1.0/144.0) * SQR(SQR(x)) + + static_cast(7.0 / 96.0) * CUBE(x) * SQR(x) - + static_cast(13.0 / 216.0) * SQR(CUBE(x)) + + static_cast(1.0 / 48.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 288.0) * SQR(SQR(SQR(x))) + + static_cast(1.0 / 4320.0) * CUBE(CUBE(x)); + } else if (x <= THREE) { + return static_cast(1553.0/2592.0) - + static_cast(339.0/448.0) * x + + static_cast(635.0/504.0) * SQR(x) - + static_cast(83.0/48.0) * CUBE(x) + + static_cast(191.0/144.0) * SQR(SQR(x)) - + static_cast(19.0 / 32.0) * CUBE(x) * SQR(x) + + static_cast(35.0 / 216.0) * SQR(CUBE(x)) - + static_cast(3.0 / 112.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(5.0 / 2016.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 10080.0) * CUBE(CUBE(x)); + } else if (x < FOUR) { + return static_cast(5883.0/896.0) * x - + static_cast(2449.0/288.0) * SQR(x) + + static_cast(563.0/96.0) * CUBE(x) - + static_cast(1423.0/576.0) * SQR(SQR(x)) + + static_cast(43.0/64.0) * CUBE(x) * SQR(x) - + static_cast(103.0/864.0) * SQR(CUBE(x)) + + static_cast(3.0 / 224.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(1.0 / 1152.0) * SQR(SQR(SQR(x))) + + static_cast(1.0 / 40320.0) * CUBE(CUBE(x)) - + static_cast(133663.0/72576.0); + } else if (x < FIVE) { + return static_cast(390625.0/72576.0) - + static_cast(78125.0/8064.0) * x + + static_cast(15625.0/2016.0) * SQR(x) - + static_cast(3125.0/864.0) * CUBE(x) + + static_cast(625.0/576.0) * SQR(SQR(x)) - + static_cast(125.0 / 576.0) * CUBE(x) * SQR(x) + + static_cast(25.0 / 864.0) * SQR(CUBE(x)) - + static_cast(5.0 / 2016.0) * SQR(SQR(x)) * CUBE(x) + + static_cast(1.0 / 8064.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 362880.0) * CUBE(CUBE(x)); + } else { + return ZERO; + } + } + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -390,6 +448,30 @@ namespace prtl_shape { S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); } } // staggered + } else if constexpr (O == 9u) { + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 4; + + #pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 5; + + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + } + } else { + i_min = i - 4; + + #pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S9(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } + } + } // staggered } else { raise::KernelError(HERE, "Unsupported interpolation order"); } From e0d39ee0a579a92f845707bd3f37ae7095e7a0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 10:55:49 -0700 Subject: [PATCH 070/154] applied formatting, improved comments and error handling --- src/kernels/currents_deposit.hpp | 18 +- src/kernels/particle_shapes.hpp | 494 +++++++++++++++++++------------ 2 files changed, 316 insertions(+), 196 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index f8c8607a3..1299be69d 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -449,12 +449,12 @@ namespace kernel { #pragma unroll for (int j = 0; j < O + 2; ++j) { // Esirkepov 2001, Eq. 38 (simplified) - Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); + Wx1[i][j] = HALF * (fS_x1[i] - iS_x1[i]) * (fS_x2[j] + iS_x2[j]); - Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); + Wx2[i][j] = HALF * (fS_x1[i] + iS_x1[i]) * (fS_x2[j] - iS_x2[j]); - Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + - iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); + Wx3[i][j] = THIRD * (fS_x2[j] * (HALF * iS_x1[i] + fS_x1[i]) + + iS_x2[j] * (HALF * fS_x1[i] + iS_x1[i])); } } @@ -701,12 +701,12 @@ namespace kernel { } } - } else { // order - raise::KernelError(HERE, "Unsupported interpolation order"); - } + } else { // order + raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } - }; - } // namespace kernel + } + }; +} // namespace kernel #undef i_di_to_Xi diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 12f126a92..0109ba949 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,77 +17,101 @@ namespace prtl_shape { - Inline real_t S4(const real_t x) - { - if (x < HALF) - { - return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + - INV_4 * SQR(SQR(x)); - } else if (x < static_cast(1.5)) - { - return static_cast(55.0/96.0) + static_cast(5.0/24.0) * x - - static_cast(5.0 / 4.0) * SQR(x) + static_cast(5.0 / 6.0) * CUBE(x) - + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on + Inline real_t S4(const real_t x) { + if (x < HALF) { + return static_cast(115.0 / 192.0) - + static_cast(5.0 / 8.0) * SQR(x) + INV_4 * SQR(SQR(x)); + } else if (x < static_cast(1.5)) { + return static_cast(55.0 / 96.0) + + static_cast(5.0 / 24.0) * x - + static_cast(5.0 / 4.0) * SQR(x) + + static_cast(5.0 / 6.0) * CUBE(x) - static_cast(1.0 / 6.0) * SQR(SQR(x)); - } else if (x < static_cast(2.5)) - { - return static_cast(625.0/384.0) - static_cast(125.0/48.0) * x + - static_cast(25.0 / 16.0) * SQR(x) - static_cast(5.0 / 12.0) * CUBE(x) + + } else if (x < static_cast(2.5)) { + return static_cast(625.0 / 384.0) - + static_cast(125.0 / 48.0) * x + + static_cast(25.0 / 16.0) * SQR(x) - + static_cast(5.0 / 12.0) * CUBE(x) + static_cast(1.0 / 24.0) * SQR(SQR(x)); } else { return ZERO; } } - Inline real_t S5(const real_t x) - { - if (x <= ONE) - { + // clang-format off + // S5(x) = + // 11/20 - (1/2) * |x|^2 + (1/4) * |x|^4 - (1/12) * |x|^5 if |x| ≤ 1 + // 17/40 + (5/8) * |x| - (7/4) * |x|^2 + (5/4) * |x|^3 - (3/8) * |x|^4 + (1/24) * |x|^5 if 1 < |x| < 2 + // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 + // 0.0 if |x| > 3 + // clang-format on + Inline real_t S5(const real_t x) { + if (x <= ONE) { return static_cast(11.0 / 20.0) - HALF * SQR(x) + - INV_4 * SQR(SQR(x)) - static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); - } else if (x < TWO) - { - return static_cast(17.0/40.0) + FIVE * INV_8 * x - - static_cast(7.0) * INV_4 * SQR(x) + FIVE * INV_4 * CUBE(x) - - THREE * INV_8 * SQR(SQR(x)) + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); - } else if (x < THREE) - { - return static_cast(81.0/40.0) - static_cast(27.0/8.0) * x + - static_cast(9.0) * INV_4 * SQR(x) - THREE_FOURTHS * CUBE(x) + - INV_8 * SQR(SQR(x)) - static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); + INV_4 * SQR(SQR(x)) - + static_cast(1.0 / 12.0) * CUBE(x) * SQR(x); + } else if (x < TWO) { + return static_cast(17.0 / 40.0) + static_cast(5.0 / 8.0) * x - + static_cast(7.0 / 4.0) * SQR(x) + + static_cast(5.0 / 4.0) * CUBE(x) - + static_cast(3.0 / 8.0) * SQR(SQR(x)) + + static_cast(1.0 / 24.0) * CUBE(x) * SQR(x); + } else if (x < THREE) { + return static_cast(81.0 / 40.0) - + static_cast(27.0 / 8.0) * x + + static_cast(9.0 / 4.0) * SQR(x) - THREE_FOURTHS * CUBE(x) + + INV_8 * SQR(SQR(x)) - + static_cast(1.0 / 120.0) * CUBE(x) * SQR(x); } else { return ZERO; } } - Inline real_t S6(const real_t x) - { + // clang-format off + // S6(x) = + // 5887/11520 - (77/192) * |x|^2 + (7/48) * |x|^4 - (1/36) * |x|^6 if |x| ≤ 1/2 + // 7861/15360 - (7/768) * |x| - (91/256) * |x|^2 - (35/288) * |x|^3 + (21/64) * |x|^4 + // - (7/48) * |x|^5 + (1/48) * |x|^6 if 1/2 < |x| < 3/2 + // 1379/7680 + (1267/960) * |x| - (329/128) * |x|^2 + (133/72) * |x|^3 + // - (21/32) * |x|^4 + (7/60) * |x|^5 - (1/120) * |x|^6 if 3/2 ≤ |x| < 5/2 + // 117649/46080 - (16807/3840) * |x| + (2401/768) * |x|^2 - (343/288) * |x|^3 + // + (49/192) * |x|^4 - (7/240) * |x|^5 + (1/720) * |x|^6 if 5/2 ≤ |x| < 7/2 + // 0.0 if |x| ≥ 7/2 + // clang-format on + Inline real_t S6(const real_t x) { if (x <= HALF) { - return static_cast(5887.0 / 11520.0) - + return static_cast(5887.0 / 11520.0) - static_cast(77.0 / 192.0) * SQR(x) + - static_cast(7.0 / 48.0) * SQR(SQR(x)) - + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(CUBE(x)); } else if (x < static_cast(1.5)) { - return static_cast(7861.0/15360.0) - - static_cast(7.0/768.0) * x - - static_cast(91.0/256.0) * SQR(x) - - static_cast(35.0/288.0) * CUBE(x) + - static_cast(21.0/64.0) * SQR(SQR(x)) - + return static_cast(7861.0 / 15360.0) - + static_cast(7.0 / 768.0) * x - + static_cast(91.0 / 256.0) * SQR(x) - + static_cast(35.0 / 288.0) * CUBE(x) + + static_cast(21.0 / 64.0) * SQR(SQR(x)) - static_cast(7.0 / 48.0) * CUBE(x) * SQR(x) + static_cast(1.0 / 48.0) * SQR(CUBE(x)); } else if (x < static_cast(2.5)) { - return static_cast(1379.0/7680.0) + - static_cast(1267.0/960.0) * x - - static_cast(329.0/128.0) * SQR(x) + - static_cast(133.0/72.0) * CUBE(x) - - static_cast(21.0/32.0) * SQR(SQR(x)) + + return static_cast(1379.0 / 7680.0) + + static_cast(1267.0 / 960.0) * x - + static_cast(329.0 / 128.0) * SQR(x) + + static_cast(133.0 / 72.0) * CUBE(x) - + static_cast(21.0 / 32.0) * SQR(SQR(x)) + static_cast(7.0 / 60.0) * CUBE(x) * SQR(x) - static_cast(1.0 / 120.0) * SQR(CUBE(x)); } else if (x < static_cast(3.5)) { - return static_cast(117649.0/46080.0) - - static_cast(16807.0/3840.0) * x + - static_cast(2401.0/768.0) * SQR(x) - - static_cast(343.0/288.0) * CUBE(x) + - static_cast(49.0/192.0) * SQR(SQR(x)) - + return static_cast(117649.0 / 46080.0) - + static_cast(16807.0 / 3840.0) * x + + static_cast(2401.0 / 768.0) * SQR(x) - + static_cast(343.0 / 288.0) * CUBE(x) + + static_cast(49.0 / 192.0) * SQR(SQR(x)) - static_cast(7.0 / 240.0) * CUBE(x) * SQR(x) + static_cast(1.0 / 720.0) * SQR(CUBE(x)); } else { @@ -95,90 +119,110 @@ namespace prtl_shape { } } - Inline real_t S7(const real_t x) - { + // clang-format off + // S7(x) = + // 151/315 - (1/3) * |x|^2 + (1/9) * |x|^4 - (1/36) * |x|^6 + (1/144) * |x|^7 if |x| < 1 + // 103/210 - (7/90) * |x| - (1/10) * |x|^2 - (7/18) * |x|^3 + (1/2) * |x|^4 + // - (7/30) * |x|^5 + (1/20) * |x|^6 - (1/240) * |x|^7 if 1 ≤ |x| ≤ 2 + // (217/90) * |x| - (23/6) * |x|^2 + (49/18) * |x|^3 - (19/18) * |x|^4 + // + (7/30) * |x|^5 - (1/36) * |x|^6 + (1/720) * |x|^7 - (139/630) if 2 < |x| < 3 + // 1024/315 - (256/45) * |x| + (64/15) * |x|^2 - (16/9) * |x|^3 + (4/9) * |x|^4 + // - (1/15) * |x|^5 + (1/180) * |x|^6 - (1/5040) * |x|^7 if 3 ≤ |x| < 4 + // 0.0 if |x| ≥ 4 + // clang-format on + Inline real_t S7(const real_t x) { if (x < ONE) { - return static_cast(151.0/315.0) - - THIRD * SQR(x) + - static_cast(1.0/9.0) * SQR(SQR(x)) - - static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0/144.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(151.0 / 315.0) - THIRD * SQR(x) + + static_cast(1.0 / 9.0) * SQR(SQR(x)) - + static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 144.0) * SQR(SQR(x)) * CUBE(x); } else if (x <= TWO) { - return static_cast(103.0/210.0) - - static_cast(7.0/90.0) * x - - static_cast(1.0/10.0) * SQR(x) - - static_cast(7.0/18.0) * CUBE(x) + - HALF * SQR(SQR(x)) - - static_cast(7.0/30.0) * CUBE(x) * SQR(x) + - static_cast(1.0/20.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0/240.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(103.0 / 210.0) - + static_cast(7.0 / 90.0) * x - + static_cast(1.0 / 10.0) * SQR(x) - + static_cast(7.0 / 18.0) * CUBE(x) + HALF * SQR(SQR(x)) - + static_cast(7.0 / 30.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 20.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0 / 240.0) * SQR(SQR(x)) * CUBE(x); } else if (x < THREE) { - return static_cast(217.0/90.0) * x - - static_cast(23.0/6.0) * SQR(x) + - static_cast(49.0/18.0) * CUBE(x) - - static_cast(19.0/18.0) * SQR(SQR(x)) + - static_cast(7.0/30.0) * CUBE(x) * SQR(x) - - static_cast(1.0/36.0) * SQR(SQR(x)) * SQR(x) + - static_cast(1.0/720.0) * SQR(SQR(x)) * CUBE(x) - - static_cast(139.0/630.0); + return static_cast(217.0 / 90.0) * x - + static_cast(23.0 / 6.0) * SQR(x) + + static_cast(49.0 / 18.0) * CUBE(x) - + static_cast(19.0 / 18.0) * SQR(SQR(x)) + + static_cast(7.0 / 30.0) * CUBE(x) * SQR(x) - + static_cast(1.0 / 36.0) * SQR(SQR(x)) * SQR(x) + + static_cast(1.0 / 720.0) * SQR(SQR(x)) * CUBE(x) - + static_cast(139.0 / 630.0); } else if (x < FOUR) { - return static_cast(1024.0/315.0) - - static_cast(256.0/45.0) * x + - static_cast(64.0/15.0) * SQR(x) - - static_cast(16.0/9.0) * CUBE(x) + - static_cast(4.0/9.0) * SQR(SQR(x)) - - static_cast(1.0/15.0) * CUBE(x) * SQR(x) + - static_cast(1.0/180.0) * SQR(SQR(x)) * SQR(x) - - static_cast(1.0/5040.0) * SQR(SQR(x)) * CUBE(x); + return static_cast(1024.0 / 315.0) - + static_cast(256.0 / 45.0) * x + + static_cast(64.0 / 15.0) * SQR(x) - + static_cast(16.0 / 9.0) * CUBE(x) + + static_cast(4.0 / 9.0) * SQR(SQR(x)) - + static_cast(1.0 / 15.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 180.0) * SQR(SQR(x)) * SQR(x) - + static_cast(1.0 / 5040.0) * SQR(SQR(x)) * CUBE(x); } else { return ZERO; } } - Inline real_t S8(const real_t x) - { + // clang-format off + // S8(x) = + // 259723/573440 - (289/1024) * |x|^2 + (43/512) * |x|^4 - (1/64) * |x|^6 + (1/576) * |x|^8 if |x| < 1/2 + // 64929/143360 + (1/5120) * |x| - (363/1280) * |x|^2 + (7/1280) * |x|^3 + (9/128) * |x|^4 + // + (7/320) * |x|^5 - (3/80) * |x|^6 + (1/80) * |x|^7 - (1/720) * |x|^8 if 1/2 ≤ |x| ≤ 3/2 + // 145167/286720 - (1457/5120) * |x| + (195/512) * |x|^2 - (1127/1280) * |x|^3 + (207/256) * |x|^4 + // - (119/320) * |x|^5 + (3/32) * |x|^6 - (1/80) * |x|^7 + (1/1440) * |x|^8 if 3/2 < |x| < 2.5 + // (146051/35840) * |x| - (1465/256) * |x|^2 + (5123/1280) * |x|^3 - (209/128) * |x|^4 + // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 + // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 + // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 + // 0.0 + // clang-format on + Inline real_t S8(const real_t x) { if (x < HALF) { - return static_cast(259723.0 / 573440.0) - + return static_cast(259723.0 / 573440.0) - static_cast(289.0 / 1024.0) * SQR(x) + - static_cast(43.0 / 512.0) * SQR(SQR(x)) - + static_cast(43.0 / 512.0) * SQR(SQR(x)) - static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); } else if (x <= static_cast(1.5)) { - return static_cast(64929.0/143360.0) + - static_cast(1.0/5120.0) * x - - static_cast(363.0/1280.0) * SQR(x) + - static_cast(7.0/1280.0) * CUBE(x) + - static_cast(9.0/128.0) * SQR(SQR(x)) + + return static_cast(64929.0 / 143360.0) + + static_cast(1.0 / 5120.0) * x - + static_cast(363.0 / 1280.0) * SQR(x) + + static_cast(7.0 / 1280.0) * CUBE(x) + + static_cast(9.0 / 128.0) * SQR(SQR(x)) + static_cast(7.0 / 320.0) * CUBE(x) * SQR(x) - static_cast(3.0 / 80.0) * SQR(CUBE(x)) + static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 720.0) * SQR(SQR(SQR(x))); } else if (x < static_cast(2.5)) { - return static_cast(145167.0/286720.0) - - static_cast(1457.0/5120.0) * x + - static_cast(195.0/512.0) * SQR(x) - - static_cast(1127.0/1280.0) * CUBE(x) + - static_cast(207.0/256.0) * SQR(SQR(x)) - + return static_cast(145167.0 / 286720.0) - + static_cast(1457.0 / 5120.0) * x + + static_cast(195.0 / 512.0) * SQR(x) - + static_cast(1127.0 / 1280.0) * CUBE(x) + + static_cast(207.0 / 256.0) * SQR(SQR(x)) - static_cast(119.0 / 320.0) * CUBE(x) * SQR(x) + static_cast(3.0 / 32.0) * SQR(CUBE(x)) - static_cast(1.0 / 80.0) * SQR(SQR(x)) * CUBE(x) + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))); } else if (x < static_cast(3.5)) { - return static_cast(146051.0/35840.0) * x - - static_cast(1465.0/256.0) * SQR(x) + - static_cast(5123.0/1280.0) * CUBE(x) - - static_cast(209.0/128.0) * SQR(SQR(x)) + + return static_cast(146051.0 / 35840.0) * x - + static_cast(1465.0 / 256.0) * SQR(x) + + static_cast(5123.0 / 1280.0) * CUBE(x) - + static_cast(209.0 / 128.0) * SQR(SQR(x)) + static_cast(131.0 / 320.0) * CUBE(x) * SQR(x) - static_cast(1.0 / 16.0) * SQR(CUBE(x)) + static_cast(3.0 / 560.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 5040.0) * SQR(SQR(SQR(x))) - - static_cast(122729.0/143360.0); + static_cast(122729.0 / 143360.0); } else if (x < static_cast(4.5)) { - return static_cast(4782969.0/1146880.0) - - static_cast(531441.0/71680.0) * x + - static_cast(59049.0/10240.0) * SQR(x) - - static_cast(6561.0/2560.0) * CUBE(x) + - static_cast(729.0/1024.0) * SQR(SQR(x)) - + return static_cast(4782969.0 / 1146880.0) - + static_cast(531441.0 / 71680.0) * x + + static_cast(59049.0 / 10240.0) * SQR(x) - + static_cast(6561.0 / 2560.0) * CUBE(x) + + static_cast(729.0 / 1024.0) * SQR(SQR(x)) - static_cast(81.0 / 640.0) * CUBE(x) * SQR(x) + static_cast(9.0 / 640.0) * SQR(CUBE(x)) - static_cast(1.0 / 1120.0) * SQR(SQR(x)) * CUBE(x) + @@ -188,54 +232,66 @@ namespace prtl_shape { } } - Inline real_t S9(const real_t x) - { + // clang-format off + // S9(x) = + // 15619/36288 - (35/144) * |x|^2 + (19/288) * |x|^4 - (5/432) * |x|^6 + (1/576) * |x|^8 - (1/2880) * |x|^9 if |x| ≤ 1 + // 7799/18144 + (1/192) * |x| - (19/72) * |x|^2 + (7/144) * |x|^3 - (1/144) * |x|^4 + (7/96) * |x|^5 + // - (13/216) * |x|^6 + (1/48) * |x|^7 - (1/288) * |x|^8 + (1/4320) * |x|^9 if 1 < |x| < 2 + // 1553/2592 - (339/448) * |x| + (635/504) * |x|^2 - (83/48) * |x|^3 + (191/144) * |x|^4 - (19/32) * |x|^5 + // + (35/216) * |x|^6 - (3/112) * |x|^7 + (5/2016) * |x|^8 - (1/10080) * |x|^9 if 2 ≤ |x| < 3 + // (5883/896) * |x| - (2449/288) * |x|^2 + (563/96) * |x|^3 - (1423/576) * |x|^4 + (43/64) * |x|^5 + // - (103/864) * |x|^6 + (3/224) * |x|^7 - (1/1152) * |x|^8 + (1/40320) * |x|^9 - (133663/72576) if 3 ≤ |x| < 4 + // 390625/72576 - (78125/8064) * |x| + (15625/2016) * |x|^2 - (3125/864) * |x|^3 + (625/576) * |x|^4 + // - (125/576) * |x|^5 + (25/864) * |x|^6 - (5/2016) * |x|^7 + (1/8064) * |x|^8 - (1/362880) * |x|^9 if 4 ≤ |x| < 5 + // 0.0 if |x| ≥ 5 + // clang-format on + Inline real_t S9(const real_t x) { if (x <= ONE) { - return static_cast(15619.0 / 36288.0) - + return static_cast(15619.0 / 36288.0) - static_cast(35.0 / 144.0) * SQR(x) + - static_cast(19.0 / 288.0) * SQR(SQR(x)) - + static_cast(19.0 / 288.0) * SQR(SQR(x)) - static_cast(5.0 / 432.0) * SQR(CUBE(x)) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))) - static_cast(1.0 / 2880.0) * SQR(SQR(SQR(x))) * x; } else if (x < TWO) { - return static_cast(7799.0/18144.0) + - static_cast(1.0/192.0) * x - - static_cast(19.0/72.0) * SQR(x) + - static_cast(7.0/144.0) * CUBE(x) - - static_cast(1.0/144.0) * SQR(SQR(x)) + + return static_cast(7799.0 / 18144.0) + + static_cast(1.0 / 192.0) * x - + static_cast(19.0 / 72.0) * SQR(x) + + static_cast(7.0 / 144.0) * CUBE(x) - + static_cast(1.0 / 144.0) * SQR(SQR(x)) + static_cast(7.0 / 96.0) * CUBE(x) * SQR(x) - static_cast(13.0 / 216.0) * SQR(CUBE(x)) + static_cast(1.0 / 48.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 288.0) * SQR(SQR(SQR(x))) + static_cast(1.0 / 4320.0) * CUBE(CUBE(x)); } else if (x <= THREE) { - return static_cast(1553.0/2592.0) - - static_cast(339.0/448.0) * x + - static_cast(635.0/504.0) * SQR(x) - - static_cast(83.0/48.0) * CUBE(x) + - static_cast(191.0/144.0) * SQR(SQR(x)) - + return static_cast(1553.0 / 2592.0) - + static_cast(339.0 / 448.0) * x + + static_cast(635.0 / 504.0) * SQR(x) - + static_cast(83.0 / 48.0) * CUBE(x) + + static_cast(191.0 / 144.0) * SQR(SQR(x)) - static_cast(19.0 / 32.0) * CUBE(x) * SQR(x) + static_cast(35.0 / 216.0) * SQR(CUBE(x)) - static_cast(3.0 / 112.0) * SQR(SQR(x)) * CUBE(x) + static_cast(5.0 / 2016.0) * SQR(SQR(SQR(x))) - static_cast(1.0 / 10080.0) * CUBE(CUBE(x)); } else if (x < FOUR) { - return static_cast(5883.0/896.0) * x - - static_cast(2449.0/288.0) * SQR(x) + - static_cast(563.0/96.0) * CUBE(x) - - static_cast(1423.0/576.0) * SQR(SQR(x)) + - static_cast(43.0/64.0) * CUBE(x) * SQR(x) - - static_cast(103.0/864.0) * SQR(CUBE(x)) + + return static_cast(5883.0 / 896.0) * x - + static_cast(2449.0 / 288.0) * SQR(x) + + static_cast(563.0 / 96.0) * CUBE(x) - + static_cast(1423.0 / 576.0) * SQR(SQR(x)) + + static_cast(43.0 / 64.0) * CUBE(x) * SQR(x) - + static_cast(103.0 / 864.0) * SQR(CUBE(x)) + static_cast(3.0 / 224.0) * SQR(SQR(x)) * CUBE(x) - static_cast(1.0 / 1152.0) * SQR(SQR(SQR(x))) + static_cast(1.0 / 40320.0) * CUBE(CUBE(x)) - - static_cast(133663.0/72576.0); + static_cast(133663.0 / 72576.0); } else if (x < FIVE) { - return static_cast(390625.0/72576.0) - - static_cast(78125.0/8064.0) * x + - static_cast(15625.0/2016.0) * SQR(x) - - static_cast(3125.0/864.0) * CUBE(x) + - static_cast(625.0/576.0) * SQR(SQR(x)) - + return static_cast(390625.0 / 72576.0) - + static_cast(78125.0 / 8064.0) * x + + static_cast(15625.0 / 2016.0) * SQR(x) - + static_cast(3125.0 / 864.0) * CUBE(x) + + static_cast(625.0 / 576.0) * SQR(SQR(x)) - static_cast(125.0 / 576.0) * CUBE(x) * SQR(x) + static_cast(25.0 / 864.0) * SQR(CUBE(x)) - static_cast(5.0 / 2016.0) * SQR(SQR(x)) * CUBE(x) + @@ -284,9 +340,9 @@ namespace prtl_shape { } } else { // compute at i + 1/2 positions i_min = i - 1; - S[0] = HALF * SQR(ONE - di); - S[2] = HALF * SQR(di); - S[1] = ONE - S[0] - S[2]; + S[0] = HALF * SQR(ONE - di); + S[2] = HALF * SQR(di); + S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 @@ -297,7 +353,7 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { @@ -305,179 +361,243 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); - S[1] = static_cast(1.0 / 6.0) * + S[0] = static_cast(1.0 / 6.0) * + CUBE(static_cast(1.5) - di); + S[1] = static_cast(1.0 / 6.0) * (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); + S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered } else if constexpr (O == 4u) { - // 5/8 - |x|^2 + 32/45 * |x|^3 - 98/675 * |x|^4 |x| < 3/2 - // S(x) = 1/25 * ( 5/2 - |x|)^4 3/2 ≤ |x| < 5/2 - // 0.0 |x| ≥ 5/2 + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions - + if (di < HALF) { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(TWO + di - static_cast(n))); } } else { i_min = i - 1; - #pragma unroll +#pragma unroll for (int n = 0; n < 5; n++) { S[n] = S4(Kokkos::fabs(ONE + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 2; + i_min = i - 2; - #pragma unroll - for (int n = 0; n < 5; n++) { - S[i] = S4(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 5; n++) { + S[i] = S4( + Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 5u) { - // 3/5 - |x|^2 + 5/6 * |x|^3 - 19/72 * |x|^4 + 13/432 * |x|^5 |x| < 2 - // S(x) = 1/135 * (3 - |x|)^5 2 ≤ |x| < 3 - // 0.0 |x| ≥ 3 + // clang-format off + // S5(x) = + // 11/20 - (1/2) * |x|^2 + (1/4) * |x|^4 - (1/12) * |x|^5 if |x| ≤ 1 + // 17/40 + (5/8) * |x| - (7/4) * |x|^2 + (5/4) * |x|^3 - (3/8) * |x|^4 + (1/24) * |x|^5 if 1 < |x| < 2 + // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 + // 0.0 if |x| > 3 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 2; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); + S[n] = S5(Kokkos::fabs(TWO + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs( + static_cast(2.5) + di - static_cast(n))); } } else { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs( + static_cast(1.5) + di - static_cast(n))); } } } // staggered } else if constexpr (O == 6u) { + // clang-format off + // S6(x) = + // 5887/11520 - (77/192) * |x|^2 + (7/48) * |x|^4 - (1/36) * |x|^6 if |x| ≤ 1/2 + // 7861/15360 - (7/768) * |x| - (91/256) * |x|^2 - (35/288) * |x|^3 + (21/64) * |x|^4 + // - (7/48) * |x|^5 + (1/48) * |x|^6 if 1/2 < |x| < 3/2 + // 1379/7680 + (1267/960) * |x| - (329/128) * |x|^2 + (133/72) * |x|^3 + // - (21/32) * |x|^4 + (7/60) * |x|^5 - (1/120) * |x|^6 if 3/2 ≤ |x| < 5/2 + // 117649/46080 - (16807/3840) * |x| + (2401/768) * |x|^2 - (343/288) * |x|^3 + // + (49/192) * |x|^4 - (7/240) * |x|^5 + (1/720) * |x|^6 if 5/2 ≤ |x| < 7/2 + // 0.0 if |x| ≥ 7/2 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions - + if (di < HALF) { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(THREE + di - static_cast(n))); } } else { i_min = i - 2; - #pragma unroll +#pragma unroll for (int n = 0; n < 7; n++) { S[n] = S6(Kokkos::fabs(TWO + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 3; + i_min = i - 3; - #pragma unroll - for (int n = 0; n < 7; n++) { - S[n] = S6(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 7; n++) { + S[n] = S6( + Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 7u) { + // clang-format off + // S7(x) = + // 151/315 - (1/3) * |x|^2 + (1/9) * |x|^4 - (1/36) * |x|^6 + (1/144) * |x|^7 if |x| < 1 + // 103/210 - (7/90) * |x| - (1/10) * |x|^2 - (7/18) * |x|^3 + (1/2) * |x|^4 + // - (7/30) * |x|^5 + (1/20) * |x|^6 - (1/240) * |x|^7 if 1 ≤ |x| ≤ 2 + // (217/90) * |x| - (23/6) * |x|^2 + (49/18) * |x|^3 - (19/18) * |x|^4 + // + (7/30) * |x|^5 - (1/36) * |x|^6 + (1/720) * |x|^7 - (139/630) if 2 < |x| < 3 + // 1024/315 - (256/45) * |x| + (64/15) * |x|^2 - (16/9) * |x|^3 + (4/9) * |x|^4 + // - (1/15) * |x|^5 + (1/180) * |x|^6 - (1/5040) * |x|^7 if 3 ≤ |x| < 4 + // 0.0 if |x| ≥ 4 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 3; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); + S[n] = S7(Kokkos::fabs(THREE + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 4; for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + S[n] = S7(Kokkos::fabs( + static_cast(3.5) + di - static_cast(n))); } } else { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 8; n++) { - S[n] = S7(Kokkos::fabs(static_cast(2.5) + di - static_cast(n))); + S[n] = S7(Kokkos::fabs( + static_cast(2.5) + di - static_cast(n))); } } } // staggered } else if constexpr (O == 8u) { + // clang-format off + // S8(x) = + // 259723/573440 - (289/1024) * |x|^2 + (43/512) * |x|^4 - (1/64) * |x|^6 + (1/576) * |x|^8 if |x| < 1/2 + // 64929/143360 + (1/5120) * |x| - (363/1280) * |x|^2 + (7/1280) * |x|^3 + (9/128) * |x|^4 + // + (7/320) * |x|^5 - (3/80) * |x|^6 + (1/80) * |x|^7 - (1/720) * |x|^8 if 1/2 ≤ |x| ≤ 3/2 + // 145167/286720 - (1457/5120) * |x| + (195/512) * |x|^2 - (1127/1280) * |x|^3 + (207/256) * |x|^4 + // - (119/320) * |x|^5 + (3/32) * |x|^6 - (1/80) * |x|^7 + (1/1440) * |x|^8 if 3/2 < |x| < 2.5 + // (146051/35840) * |x| - (1465/256) * |x|^2 + (5123/1280) * |x|^3 - (209/128) * |x|^4 + // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 + // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 + // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 + // 0.0 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 4; - #pragma unroll +#pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { i_min = i - 3; - #pragma unroll +#pragma unroll for (int n = 0; n < 9; n++) { S[n] = S8(Kokkos::fabs(THREE + di - static_cast(n))); } } } else { // compute at i + 1/2 positions - i_min = i - 4; + i_min = i - 4; - #pragma unroll - for (int n = 0; n < 9; n++) { - S[n] = S8(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); - } +#pragma unroll + for (int n = 0; n < 9; n++) { + S[n] = S8( + Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + } } // staggered } else if constexpr (O == 9u) { + // clang-format off + // S9(x) = + // 15619/36288 - (35/144) * |x|^2 + (19/288) * |x|^4 - (5/432) * |x|^6 + (1/576) * |x|^8 - (1/2880) * |x|^9 if |x| ≤ 1 + // 7799/18144 + (1/192) * |x| - (19/72) * |x|^2 + (7/144) * |x|^3 - (1/144) * |x|^4 + (7/96) * |x|^5 + // - (13/216) * |x|^6 + (1/48) * |x|^7 - (1/288) * |x|^8 + (1/4320) * |x|^9 if 1 < |x| < 2 + // 1553/2592 - (339/448) * |x| + (635/504) * |x|^2 - (83/48) * |x|^3 + (191/144) * |x|^4 - (19/32) * |x|^5 + // + (35/216) * |x|^6 - (3/112) * |x|^7 + (5/2016) * |x|^8 - (1/10080) * |x|^9 if 2 ≤ |x| < 3 + // (5883/896) * |x| - (2449/288) * |x|^2 + (563/96) * |x|^3 - (1423/576) * |x|^4 + (43/64) * |x|^5 + // - (103/864) * |x|^6 + (3/224) * |x|^7 - (1/1152) * |x|^8 + (1/40320) * |x|^9 - (133663/72576) if 3 ≤ |x| < 4 + // 390625/72576 - (78125/8064) * |x| + (15625/2016) * |x|^2 - (3125/864) * |x|^3 + (625/576) * |x|^4 + // - (125/576) * |x|^5 + (25/864) * |x|^6 - (5/2016) * |x|^7 + (1/8064) * |x|^8 - (1/362880) * |x|^9 if 4 ≤ |x| < 5 + // 0.0 if |x| ≥ 5 + // clang-format on if constexpr (not STAGGERED) { // compute at i positions i_min = i - 4; - - #pragma unroll + +#pragma unroll for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); + S[n] = S9(Kokkos::fabs(FOUR + di - static_cast(n))); } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 5; for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + S[n] = S9(Kokkos::fabs( + static_cast(4.5) + di - static_cast(n))); } } else { i_min = i - 4; - #pragma unroll +#pragma unroll for (int n = 0; n < 10; n++) { - S[n] = S9(Kokkos::fabs(static_cast(3.5) + di - static_cast(n))); + S[n] = S9(Kokkos::fabs( + static_cast(3.5) + di - static_cast(n))); } } } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } } - template Inline void for_deposit(const int& i_init, const real_t& di_init, From aa500e3dd2003891c5145c8525e40c5582621eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20B=C3=B6ss?= Date: Thu, 11 Sep 2025 15:32:06 -0500 Subject: [PATCH 071/154] first attempt to give shape function order at compile time --- CMakeLists.txt | 14 ++++++++++++++ cmake/config.cmake | 15 +++++++++++++++ cmake/defaults.cmake | 3 +++ cmake/report.cmake | 12 ++++++++++++ src/engines/srpic.hpp | 26 +++----------------------- src/global/global.h | 8 +++++++- src/kernels/particle_pusher_sr.hpp | 12 +++++------- src/kernels/particle_shapes.hpp | 1 - 8 files changed, 59 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cff5b41b..1afa1eefa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,11 @@ set(DEBUG set(precision ${default_precision} CACHE STRING "Precision") + +set(shapefunction + ${default_shapefunction} + CACHE STRING "Shape function") + set(pgen ${default_pgen} CACHE STRING "Problem generator") @@ -75,6 +80,14 @@ set(precisions "single" "double" CACHE STRING "Precisions") +set(shapefunctions + "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" + CACHE STRING "Shape functions") + +set(deposits + "Zig-zag" "Esirkepov" + CACHE STRING "Deposits") + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # @@ -92,6 +105,7 @@ include_directories(${plog_SRC}/include) # -------------------------------- Main code ------------------------------- # set_precision(${precision}) +set_shapefunction(${shapefunction}) if("${Kokkos_DEVICES}" MATCHES "CUDA") add_compile_options("-D CUDA_ENABLED") set(DEVICE_ENABLED ON) diff --git a/cmake/config.cmake b/cmake/config.cmake index 97ed658e3..cf2941abb 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -16,6 +16,21 @@ function(set_precision precision_name) endif() endfunction() +# -------------------------------- Shape function ------------------------------- # +function(set_shapefunction shapefunction) + list(FIND shapefunctions ${shapefunction} SHAPEFUNCTION_FOUND) + + if(${SHAPEFUNCTION_FOUND} EQUAL -1) + message( + FATAL_ERROR + "Invalid shape function order: ${shapefunction}\nValid options are: ${shapefunctions}" + ) + endif() + + add_compile_options("-DN_ORDER=${shapefunction}") + +endfunction() + # ---------------------------- Problem generator --------------------------- # function(set_problem_generator pgen_name) if(pgen_name STREQUAL ".") diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index 2bfa9a61c..9e3da4cbe 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -19,6 +19,9 @@ set(default_engine set(default_precision "single" CACHE INTERNAL "Default precision") +set(default_shapefunction + 0 + CACHE INTERNAL "Default shape function") set(default_pgen "." CACHE INTERNAL "Default problem generator") diff --git a/cmake/report.cmake b/cmake/report.cmake index 33443d298..397d656d7 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -37,6 +37,15 @@ printchoices( "${Blue}" PRECISION_REPORT 46) +printchoices( + "Shapefunction Order" + "shapefunction" + "${shapefunctions}" + ${shapefunction} + ${default_shapefunction} + "${Blue}" + SHAPEFUNCTION_REPORT + 46) printchoices( "Output" "output" @@ -113,6 +122,9 @@ string( ${PRECISION_REPORT} "\n" " " + ${SHAPEFUNCTION_REPORT} + "\n" + " " ${OUTPUT_REPORT} "\n") diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index c0ba54c31..50d42e3ca 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -543,29 +543,9 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - if (shape_order == 0) { - deposit_with<0u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 1) { - deposit_with<1u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 2) { - deposit_with<2u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 3) { - deposit_with<3u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 4) { - deposit_with<4u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 5) { - deposit_with<5u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 6) { - deposit_with<6u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 7) { - deposit_with<7u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 8) { - deposit_with<8u>(species, domain.mesh.metric, scatter_cur, dt); - } else if (shape_order == 9) { - deposit_with<9u>(species, domain.mesh.metric, scatter_cur, dt); - } else { - raise::Error("Invalid shape order for current deposition", HERE); - } + + deposit_with(species, domain.mesh.metric, scatter_cur, dt); + } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/global/global.h b/src/global/global.h index adffcf6e9..52a4feae6 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -113,7 +113,13 @@ namespace files { namespace ntt { - inline constexpr std::size_t N_GHOSTS = 2; +#ifndef N_ORDER +#define N_ORDER 0 +inline constexpr std::size_t N_GHOSTS = 2; +#else // N_ORDER + inline constexpr std::size_t N_GHOSTS = static_cast((N_ORDER + 1)/2) + 1; +#endif // N_ORDER + // Coordinate shift to account for ghost cells #define COORD(I) \ (static_cast(static_cast((I)) - static_cast(N_GHOSTS))) diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index bf4cfd2d6..f981e0586 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -477,8 +477,8 @@ namespace kernel::sr { vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; bool is_gca { false }; - // field interpolation 1st-6th order - getInterpFlds(p, ei, bi); + // field interpolation 0th-9th order + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); @@ -830,14 +830,12 @@ namespace kernel::sr { } } + template Inline void getInterpFlds(index_t& p, vec_t& e0, vec_t& b0) const { - // ToDo: implement template in srpic.hpp - const unsigned int O = 1u; - - // ToDo: change to 1u! + // Zig-zag interpolation if constexpr (O == 0u) { if constexpr (D == Dim::_1D) { @@ -1099,7 +1097,7 @@ namespace kernel::sr { c1 = c01 * ponpmy + c11 * ponppy; b0[2] = c0 * ponpmz + c1 * ponppz; } - } else if constexpr ((O >= 1u) and (O <= 5u)) { + } else if constexpr (O >= 1u) { if constexpr (D == Dim::_1D) { const int i { i1(p) + static_cast(N_GHOSTS) }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 0109ba949..da2f5169a 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -612,7 +612,6 @@ namespace prtl_shape { The N-th order shape function per particle is a N+2 element array where the shape function contributes to only N+1 elements. We need to find which indices are contributing to the shape function - For this we first compute the indices of the particle position Let * be the particle position at the current timestep Let x be the particle position at the previous timestep From 578cec2e3a5e47181a6846f166e00e9b05a158e8 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 11 Sep 2025 13:45:42 -0700 Subject: [PATCH 072/154] cmake for shape funcs --- CMakeLists.txt | 16 +++++++++------- cmake/config.cmake | 19 +++++++------------ cmake/report.cmake | 10 +++++----- src/engines/srpic.hpp | 3 +-- src/global/global.h | 14 ++++++++------ 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1afa1eefa..5c3b2710d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,14 +80,16 @@ set(precisions "single" "double" CACHE STRING "Precisions") -set(shapefunctions - "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" - CACHE STRING "Shape functions") - -set(deposits - "Zig-zag" "Esirkepov" +set(deposit + "zigzag" "esirkepov" CACHE STRING "Deposits") +if(${deposit} STREQUAL "esirkepov") + set(shape_order + 1 + CACHE INTEGER "Shape functions") +endif() + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # @@ -105,7 +107,7 @@ include_directories(${plog_SRC}/include) # -------------------------------- Main code ------------------------------- # set_precision(${precision}) -set_shapefunction(${shapefunction}) +set_shape_order(${shape_order}) if("${Kokkos_DEVICES}" MATCHES "CUDA") add_compile_options("-D CUDA_ENABLED") set(DEVICE_ENABLED ON) diff --git a/cmake/config.cmake b/cmake/config.cmake index cf2941abb..8324957b8 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -16,19 +16,14 @@ function(set_precision precision_name) endif() endfunction() -# -------------------------------- Shape function ------------------------------- # -function(set_shapefunction shapefunction) - list(FIND shapefunctions ${shapefunction} SHAPEFUNCTION_FOUND) - - if(${SHAPEFUNCTION_FOUND} EQUAL -1) - message( - FATAL_ERROR - "Invalid shape function order: ${shapefunction}\nValid options are: ${shapefunctions}" - ) +# ------------------------------- Shape function --------------------------- # +function(set_shape_order shape_order) + if(${deposit} STREQUAL "esirkepov") + if(${shape_order} GREATER 9) + message(FATAL_ERROR "Shape order must be between 1 and 9") + endif() + add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() - - add_compile_options("-DN_ORDER=${shapefunction}") - endfunction() # ---------------------------- Problem generator --------------------------- # diff --git a/cmake/report.cmake b/cmake/report.cmake index 397d656d7..94ba9f65f 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -38,11 +38,11 @@ printchoices( PRECISION_REPORT 46) printchoices( - "Shapefunction Order" - "shapefunction" - "${shapefunctions}" - ${shapefunction} - ${default_shapefunction} + "Shape order" + "shape_order" + "" + ${shape_order} + "1" "${Blue}" SHAPEFUNCTION_REPORT 46) diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 50d42e3ca..ef6263606 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -543,9 +543,8 @@ namespace ntt { species.npart(), (double)species.charge()), HERE); - - deposit_with(species, domain.mesh.metric, scatter_cur, dt); + deposit_with(species, domain.mesh.metric, scatter_cur, dt); } Kokkos::Experimental::contribute(domain.fields.cur, scatter_cur); } diff --git a/src/global/global.h b/src/global/global.h index 52a4feae6..f6d5b13dc 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -113,12 +113,14 @@ namespace files { namespace ntt { -#ifndef N_ORDER -#define N_ORDER 0 -inline constexpr std::size_t N_GHOSTS = 2; -#else // N_ORDER - inline constexpr std::size_t N_GHOSTS = static_cast((N_ORDER + 1)/2) + 1; -#endif // N_ORDER +#if !defined(SHAPE_ORDER) + #define SHAPE_ORDER 0 + inline constexpr std::size_t N_GHOSTS = 2; +#else // SHAPE_ORDER + inline constexpr std::size_t N_GHOSTS = static_cast( + (SHAPE_ORDER + 1) / 2) + + 1; +#endif // SHAPE_ORDER // Coordinate shift to account for ghost cells #define COORD(I) \ From a4f75de9b415be4bd73ee9f56aa800ea7d24c5db Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 11 Sep 2025 16:18:57 -0500 Subject: [PATCH 073/154] fix cmake options for deposit and shape order --- CMakeLists.txt | 20 ++++++++++++++------ cmake/defaults.cmake | 9 ++++++--- cmake/report.cmake | 16 ++++++++++++++-- src/kernels/particle_pusher_sr.hpp | 2 +- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c3b2710d..76a52f29d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,12 @@ set(precision ${default_precision} CACHE STRING "Precision") -set(shapefunction - ${default_shapefunction} +set(deposit + ${default_deposit} + CACHE STRING "Deposit") + +set(shape_order + ${default_shape_order} CACHE STRING "Shape function") set(pgen @@ -80,16 +84,20 @@ set(precisions "single" "double" CACHE STRING "Precisions") -set(deposit +set(deposits "zigzag" "esirkepov" CACHE STRING "Deposits") -if(${deposit} STREQUAL "esirkepov") +if(${deposit} STREQUAL "zigzag") set(shape_order - 1 - CACHE INTEGER "Shape functions") + ${default_shape_order} + CACHE STRING "Shape functions") endif() +set(shape_orders + "1" "2" "3" "4" "5" "6" "7" "8" "9" + CACHE STRING "Shape orders") + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) # ------------------------- Third-Party Tests ------------------------------ # diff --git a/cmake/defaults.cmake b/cmake/defaults.cmake index 9e3da4cbe..fb8790019 100644 --- a/cmake/defaults.cmake +++ b/cmake/defaults.cmake @@ -19,9 +19,12 @@ set(default_engine set(default_precision "single" CACHE INTERNAL "Default precision") -set(default_shapefunction - 0 - CACHE INTERNAL "Default shape function") +set(default_deposit + "zigzag" + CACHE INTERNAL "Default deposit") +set(default_shape_order + 1 + CACHE INTERNAL "Default shape function order") set(default_pgen "." CACHE INTERNAL "Default problem generator") diff --git a/cmake/report.cmake b/cmake/report.cmake index 94ba9f65f..8f62ac17b 100644 --- a/cmake/report.cmake +++ b/cmake/report.cmake @@ -37,12 +37,21 @@ printchoices( "${Blue}" PRECISION_REPORT 46) +printchoices( + "Deposit" + "deposit" + "${deposits}" + ${deposit} + ${default_deposit} + "${Blue}" + DEPOSIT_REPORT + 46) printchoices( "Shape order" "shape_order" - "" + "${shape_orders}" ${shape_order} - "1" + ${default_shape_order} "${Blue}" SHAPEFUNCTION_REPORT 46) @@ -122,6 +131,9 @@ string( ${PRECISION_REPORT} "\n" " " + ${DEPOSIT_REPORT} + "\n" + " " ${SHAPEFUNCTION_REPORT} "\n" " " diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index f981e0586..2707bb805 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -478,7 +478,7 @@ namespace kernel::sr { bool is_gca { false }; // field interpolation 0th-9th order - getInterpFlds(p, ei, bi); + getInterpFlds(p, ei, bi); metric.template transform_xyz(xp_Cd, ei, ei_Cart); metric.template transform_xyz(xp_Cd, bi, bi_Cart); From b2058d9c9530ef6f771219261ad0828a4144a75b Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 11 Sep 2025 16:42:55 -0700 Subject: [PATCH 074/154] comm fields in shock setup --- pgens/shock/pgen.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pgens/shock/pgen.hpp b/pgens/shock/pgen.hpp index 6bd6f21a9..9bce6f44a 100644 --- a/pgens/shock/pgen.hpp +++ b/pgens/shock/pgen.hpp @@ -67,7 +67,6 @@ namespace user { const real_t Btheta, Bphi, Vx, Bmag; }; - template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -82,6 +81,8 @@ namespace user { using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; + const Metadomain& global_domain; + // domain properties const real_t global_xmin, global_xmax; // gas properties @@ -95,6 +96,7 @@ namespace user { inline PGen(const SimulationParams& p, const Metadomain& global_domain) : arch::ProblemGenerator { p } + , global_domain { global_domain } , global_xmin { global_domain.mesh().extent(in::x1).first } , global_xmax { global_domain.mesh().extent(in::x1).second } , drift_ux { p.template get("setup.drift_ux") } @@ -116,8 +118,8 @@ namespace user { return init_flds; } - auto FixFieldsConst(const bc_in&, const em& comp) const - -> std::pair { + auto FixFieldsConst(const bc_in&, + const em& comp) const -> std::pair { if (comp == em::ex1) { return { init_flds.ex1({ ZERO }), true }; } else if (comp == em::ex2) { @@ -176,7 +178,7 @@ namespace user { const auto energy_dist = arch::TwoTemperatureMaxwellian( local_domain.mesh.metric, local_domain.random_pool, - { temperature_ratio * temperature * local_domain.species[1].mass() , + { temperature_ratio * temperature * local_domain.species[1].mass(), temperature }, { 1, 2 }, -drift_ux, @@ -266,6 +268,7 @@ namespace user { domain.fields.em, init_flds, domain.mesh.metric }); + global_domain.CommunicateFields(domain, Comm::E | Comm::B); /* tag particles inside the injection zone as dead @@ -318,8 +321,7 @@ namespace user { const auto energy_dist = arch::TwoTemperatureMaxwellian( domain.mesh.metric, domain.random_pool, - { temperature_ratio * temperature * domain.species[1].mass(), - temperature }, + { temperature_ratio * temperature * domain.species[1].mass(), temperature }, { 1, 2 }, -drift_ux, in::x1); From a4a0044c8ca7210343cafd0f986fa73bcddf5282 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 11 Sep 2025 16:47:10 -0700 Subject: [PATCH 075/154] metadomain correctly passed to shock pgen --- pgens/shock/pgen.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pgens/shock/pgen.hpp b/pgens/shock/pgen.hpp index 9bce6f44a..a93d6891e 100644 --- a/pgens/shock/pgen.hpp +++ b/pgens/shock/pgen.hpp @@ -81,7 +81,7 @@ namespace user { using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - const Metadomain& global_domain; + Metadomain& global_domain; // domain properties const real_t global_xmin, global_xmax; @@ -94,7 +94,7 @@ namespace user { real_t Btheta, Bphi, Bmag; InitFields init_flds; - inline PGen(const SimulationParams& p, const Metadomain& global_domain) + inline PGen(const SimulationParams& p, Metadomain& global_domain) : arch::ProblemGenerator { p } , global_domain { global_domain } , global_xmin { global_domain.mesh().extent(in::x1).first } From 7c192020b16f4789a6a647a4cc7366464eca403a Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Fri, 12 Sep 2025 18:59:28 -0500 Subject: [PATCH 076/154] bugfix in 3rd order shape function i_min for not staggered case --- src/kernels/particle_shapes.hpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index da2f5169a..d35c77d67 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -323,9 +323,9 @@ namespace prtl_shape { } } // staggered } else if constexpr (O == 2u) { - // 3/4 - |x|^2 |x| < 1/2 + // 3/4 - |x|^2 |x| < 1/2 // S(x) = 1/2 * (3/2 - |x|)^2 1/2 ≤ |x| < 3/2 - // 0.0 |x| ≥ 3/2 + // 0.0 |x| ≥ 3/2 if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { i_min = i - 1; @@ -345,30 +345,29 @@ namespace prtl_shape { S[1] = ONE - S[0] - S[2]; } // staggered } else if constexpr (O == 3u) { - // 1/6 * ( 4 - 6 * |x|^2 + 3 * |x|^3) |x| < 1 - // S(x) = 1/6 * ( 2 - |x|)^3 1 ≤ |x| < 2 - // 0.0 |x| ≥ 2 + // 2/3 - x^2 + 1/2 * x^3 |x| < 1 + // S(x) = 1/6 * (2 - |x|)^3 1 ≤ |x| < 2 + // 0.0 |x| ≥ 2 if constexpr (not STAGGERED) { // compute at i positions - i_min = i - 2; + i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(di) + THREE * CUBE(di)); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); - S[2] = ONE - S[0] - S[1] - S[3]; + S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(HALF + di) + THREE * CUBE(HALF + di)); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + HALF * CUBE(HALF + di); S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(static_cast(1.5) - di); - S[1] = static_cast(1.0 / 6.0) * - (FOUR - SIX * SQR(di - HALF) + THREE * CUBE(di - HALF)); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF - di) + + HALF * CUBE(HALF - di); S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); S[2] = ONE - S[0] - S[1] - S[3]; } @@ -527,7 +526,7 @@ namespace prtl_shape { // + (131/320) * |x|^5 - (1/16) * |x|^6 + (3/560) * |x|^7 - (1/5040) * |x|^8 - (122729/143360) if 2.5 ≤ |x| < 3.5 // 4782969/1146880 - (531441/71680) * |x| + (59049/10240) * |x|^2 - (6561/2560) * |x|^3 + (729/1024) * |x|^4 // - (81/640) * |x|^5 + (9/640) * |x|^6 - (1/1120) * |x|^7 + (1/40320) * |x|^8 if 3.5 ≤ |x| < 4.5 - // 0.0 + // 0.0 // clang-format on if constexpr (not STAGGERED) { // compute at i positions if (di < HALF) { From 58950c5bc7481371408c22a2802d1a2ed157316f Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 17 Sep 2025 18:07:47 -0500 Subject: [PATCH 077/154] simplifications for 3rd order shape functions --- src/global/utils/numeric.h | 6 ++- src/kernels/particle_shapes.hpp | 73 +++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/global/utils/numeric.h b/src/global/utils/numeric.h index 3b7b9f554..856ccb838 100644 --- a/src/global/utils/numeric.h +++ b/src/global/utils/numeric.h @@ -41,7 +41,8 @@ inline constexpr float TWELVE = 12.0f; inline constexpr float ZERO = 0.0f; inline constexpr float HALF = 0.5f; inline constexpr float THIRD = 0.333333f; -inline constexpr float THREE_FOURTHS = 0.75f; +inline constexpr float THREE_FOURTHS = 0.75f; +inline constexpr float THREE_HALFS = 1.5f; inline constexpr float INV_2 = 0.5f; inline constexpr float INV_4 = 0.25f; inline constexpr float INV_8 = 0.125f; @@ -59,7 +60,8 @@ inline constexpr double TWELVE = 12.0; inline constexpr double ZERO = 0.0; inline constexpr double HALF = 0.5; inline constexpr double THIRD = 0.3333333333333333; -inline constexpr double THREE_FOURTHS = 0.75; +inline constexpr double THREE_FOURTHS = 0.75; +inline constexpr float THREE_HALFS = 1.5; inline constexpr double INV_2 = 0.5; inline constexpr double INV_4 = 0.25; inline constexpr double INV_8 = 0.125; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index d35c77d67..ddf852661 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -17,6 +17,23 @@ namespace prtl_shape { + // clang-format off + // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 + // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 + // 625/384 - (125/48) * |x| + (25/16) * |x|^2 - (5/12) * |x|^3 + (1/24) * |x|^4 3/2 ≤ |x| < 5/2 + // 0.0 |x| ≥ 5/2 + // clang-format on + Inline real_t S3(const real_t x) { + if (x < ONE) { + return static_cast(2.0 / 3.0) - SQR(x) + HALF * CUBE(x); + } else if (x < TWO) { + return static_cast(4.0 / 3.0) - TWO * x + SQR(x) - + static_cast(1.0 / 6.0) * CUBE(x); + } else { + return ZERO; + } + } + // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 // S(x) = 55/96 + (5/24) * |x| - (5/4) * |x|^2 + (5/6) * |x|^3 - (1/6) * |x|^4 1/2 ≤ |x| < 3/2 @@ -27,7 +44,7 @@ namespace prtl_shape { if (x < HALF) { return static_cast(115.0 / 192.0) - static_cast(5.0 / 8.0) * SQR(x) + INV_4 * SQR(SQR(x)); - } else if (x < static_cast(1.5)) { + } else if (x < THREE_HALFS) { return static_cast(55.0 / 96.0) + static_cast(5.0 / 24.0) * x - static_cast(5.0 / 4.0) * SQR(x) + @@ -51,6 +68,7 @@ namespace prtl_shape { // 81/40 - (27/8) * |x| + (9/4) * |x|^2 - (3/4) * |x|^3 + (1/8) * |x|^4 - (1/120) * |x|^5 if 2 ≤ |x| < 3 // 0.0 if |x| > 3 // clang-format on + Inline real_t S5(const real_t x) { if (x <= ONE) { return static_cast(11.0 / 20.0) - HALF * SQR(x) + @@ -90,7 +108,7 @@ namespace prtl_shape { static_cast(77.0 / 192.0) * SQR(x) + static_cast(7.0 / 48.0) * SQR(SQR(x)) - static_cast(1.0 / 36.0) * SQR(CUBE(x)); - } else if (x < static_cast(1.5)) { + } else if (x < THREE_HALFS) { return static_cast(7861.0 / 15360.0) - static_cast(7.0 / 768.0) * x - static_cast(91.0 / 256.0) * SQR(x) - @@ -187,7 +205,7 @@ namespace prtl_shape { static_cast(43.0 / 512.0) * SQR(SQR(x)) - static_cast(1.0 / 64.0) * SQR(SQR(x)) * SQR(x) + static_cast(1.0 / 576.0) * SQR(SQR(SQR(x))); - } else if (x <= static_cast(1.5)) { + } else if (x <= THREE_HALFS) { return static_cast(64929.0 / 143360.0) + static_cast(1.0 / 5120.0) * x - static_cast(363.0 / 1280.0) * SQR(x) + @@ -318,7 +336,7 @@ namespace prtl_shape { S[1] = ONE - S[0]; } else { i_min = i; - S[0] = static_cast(1.5) - di; + S[0] = THREE_HALFS - di; S[1] = ONE - S[0]; } } // staggered @@ -334,7 +352,7 @@ namespace prtl_shape { S[2] = ONE - S[0] - S[1]; } else { i_min = i; - S[0] = HALF * SQR(static_cast(3.0 / 2.0) - di); + S[0] = HALF * SQR(THREE_HALFS - di); S[1] = THREE_FOURTHS - SQR(ONE - di); S[2] = ONE - S[0] - S[1]; } @@ -352,26 +370,49 @@ namespace prtl_shape { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); + S[3] = static_cast(1.0 / 6.0) * CUBE(FOUR - di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + - HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + S[1] = static_cast(2.0 / 3.0) - SQR(THREE_HALFS + di) + + HALF * CUBE(THREE_HALFS + di); + S[3] = static_cast(1.0 / 6.0) * + CUBE(static_cast(3.5) - di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * - CUBE(static_cast(1.5) - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF - di) + + S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); + S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + HALF * CUBE(HALF - di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di - HALF); + S[3] = static_cast(1.0 / 6.0) * + CUBE(static_cast(2.5) - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered + + // if constexpr (not STAGGERED) { // compute at i positions + // i_min = i - 1; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); + // } + // } else { // compute at i + 1/2 positions + // if (di < HALF) { + // i_min = i - 2; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); + // } + // } else { + // i_min = i - 1; + // #pragma unroll + // for (int n = 0; n < 4; n++) { + // S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); + // } + // } + // } // staggered } else if constexpr (O == 4u) { // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 @@ -401,8 +442,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 5; n++) { - S[i] = S4( - Kokkos::fabs(static_cast(1.5) + di - static_cast(n))); + S[i] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } // staggered } else if constexpr (O == 5u) { @@ -434,8 +474,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 6; n++) { - S[n] = S5(Kokkos::fabs( - static_cast(1.5) + di - static_cast(n))); + S[n] = S5(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } } // staggered From 5025936fca03c5e3ea5be30262ba8e948209c635 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 18 Sep 2025 21:50:42 -0500 Subject: [PATCH 078/154] bugfix and optimisation for S3 --- src/kernels/particle_shapes.hpp | 34 ++++++--------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index ddf852661..2615b4310 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -370,49 +370,27 @@ namespace prtl_shape { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(FOUR - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(di); S[2] = ONE - S[0] - S[1] - S[3]; } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(THREE_HALFS + di) + - HALF * CUBE(THREE_HALFS + di); + S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + HALF * CUBE(HALF + di); S[3] = static_cast(1.0 / 6.0) * - CUBE(static_cast(3.5) - di); + CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + - HALF * CUBE(HALF - di); + HALF * CUBE(di - HALF); S[3] = static_cast(1.0 / 6.0) * - CUBE(static_cast(2.5) - di); + CUBE(HALF - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered - - // if constexpr (not STAGGERED) { // compute at i positions - // i_min = i - 1; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); - // } - // } else { // compute at i + 1/2 positions - // if (di < HALF) { - // i_min = i - 2; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); - // } - // } else { - // i_min = i - 1; - // #pragma unroll - // for (int n = 0; n < 4; n++) { - // S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); - // } - // } - // } // staggered } else if constexpr (O == 4u) { // clang-format off // 115/192 - (5/8) * |x|^2 + (1/4) * |x|^4 |x| < 1/2 From 554fcc2de265acc30d00c2674901f198f62dda20 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 22 Sep 2025 17:19:37 -0500 Subject: [PATCH 079/154] bugfix in 4th order shape function --- src/kernels/particle_shapes.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 2615b4310..5eada8614 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -420,7 +420,7 @@ namespace prtl_shape { #pragma unroll for (int n = 0; n < 5; n++) { - S[i] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); + S[n] = S4(Kokkos::fabs(THREE_HALFS + di - static_cast(n))); } } // staggered } else if constexpr (O == 5u) { From 8faa0ee679894e4cb9e2f525cfd81e31f144cf75 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 22 Sep 2025 18:30:15 -0500 Subject: [PATCH 080/154] bugfix in 3D indexing --- src/kernels/currents_deposit.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 1299be69d..f44b54ce4 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -606,13 +606,13 @@ namespace kernel { jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } - + #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll for (int j = 0; j < O + 2; ++j) { #pragma unroll - for (int k = 0; j < O + 2; ++k) { + for (int k = 0; k < O + 2; ++k) { jx1[i][j][k] = jx1[i - 1][j][k] - Qdxdt * Wx1[i][j][k]; } } @@ -699,8 +699,8 @@ namespace kernel { } } } - } + } // dim } else { // order raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); } From 081e76c63e152015772e273bb53be529d120714f Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 6 Oct 2025 15:46:08 -0400 Subject: [PATCH 081/154] delta/beta added underscore to naming + tests --- input.example.toml | 59 +++++++++++++++++++++------- src/engines/srpic.hpp | 20 +++++----- src/framework/parameters.cpp | 62 +++++++++++++++--------------- src/framework/tests/parameters.cpp | 48 +++++++++++++++++++++++ src/global/defaults.h | 18 ++++----- 5 files changed, 144 insertions(+), 63 deletions(-) diff --git a/input.example.toml b/input.example.toml index eb4445bba..3a458f8b3 100644 --- a/input.example.toml +++ b/input.example.toml @@ -257,20 +257,53 @@ # @note: [required] if one of the species has `cooling = "synchrotron"` gamma_rad = "" + # Stencil coefficients for the field solver [notation as in Blinne+ (2018)] + # @note: Standard Yee solver: `delta_i = beta_ij = 0.0` [algorithms.fieldsolver] - # Yee - all 0.0 - default - # 1D - deltax = -0.065 - # 2D - deltay = -0.065 - betaxy = -0.065 - betayx = -0.065 - # 3D - not yet tested - deltaz = 0.0 - betaxz = 0.0 - betazx = 0.0 - betayz = 0.0 - betazy = 0.0 + # delta_x coefficient (for `F_{i +/- 3/2, j, k}`) + # @type: float + # @default: 0.0 + delta_x = "" + # delta_y coefficient (for `F_{i, j +/- 3/2, k}`) + # @type: float + # @default: 0.0 + # @note: Used only for 2D and 3D + delta_y = "" + # delta_z coefficient (for `F_{i, j, k +/- 3/2}`) + # @type: float + # @default: 0.0 + # @note: Used only for 3D + delta_z = "" + # beta_xy coefficient (for `F_{i +/- 1/2, j +/- 1, k}`) + # @type: float + # @default: 0.0 + # @note: Used only for 2D and 3D + beta_xy = "" + # beta_yx coefficient (for `F_{i +/- 1, j +/- 1/2, k}`) + # @type: float + # @default: 0.0 + # @note: Used only for 2D and 3D + beta_yx = "" + # beta_xz coefficient (for `F_{i +/- 1/2, j, k +/- 1}`) + # @type: float + # @default: 0.0 + # @note: Used only for 3D + beta_xz = "" + # beta_zx coefficient (for `F_{i +/- 1, j, k +/- 1/2}`) + # @type: float + # @default: 0.0 + # @note: Used only for 3D + beta_zx = "" + # beta_yz coefficient (for `F_{i, j +/- 1/2, k +/- 1}`) + # @type: float + # @default: 0.0 + # @note: Used only for 3D + beta_yz = "" + # beta_zy coefficient (for `F_{i, j +/- 1, k +/- 1/2}`) + # @type: float + # @default: 0.0 + # @note: Used only for 3D + beta_zy = "" [particles] # Fiducial number of particles per cell diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index 3418da9e5..a64c44f22 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -186,24 +186,24 @@ namespace ntt { // minkowski case const auto dx = math::sqrt(domain.mesh.metric.template h_<1, 1>({})); const auto deltax = m_params.template get( - "algorithms.fieldsolver.deltax"); + "algorithms.fieldsolver.delta_x"); const auto deltay = m_params.template get( - "algorithms.fieldsolver.deltay"); + "algorithms.fieldsolver.delta_y"); const auto betaxy = m_params.template get( - "algorithms.fieldsolver.betaxy"); + "algorithms.fieldsolver.beta_xy"); const auto betayx = m_params.template get( - "algorithms.fieldsolver.betayx"); + "algorithms.fieldsolver.beta_yx"); const auto deltaz = m_params.template get( - "algorithms.fieldsolver.deltaz"); + "algorithms.fieldsolver.delta_z"); const auto betaxz = m_params.template get( - "algorithms.fieldsolver.betaxz"); + "algorithms.fieldsolver.beta_xz"); const auto betazx = m_params.template get( - "algorithms.fieldsolver.betazx"); + "algorithms.fieldsolver.beta_zx"); const auto betayz = m_params.template get( - "algorithms.fieldsolver.betayz"); + "algorithms.fieldsolver.beta_yz"); const auto betazy = m_params.template get( - "algorithms.fieldsolver.betazy"); - real_t coeff1, coeff2; + "algorithms.fieldsolver.beta_zy"); + real_t coeff1, coeff2; if constexpr (M::Dim == Dim::_2D) { coeff1 = dT / SQR(dx); coeff2 = dT; diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 702f117b3..0987934e3 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -31,10 +31,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -416,60 +416,60 @@ namespace ntt { toml::find_or(toml_data, "algorithms", "toggles", "deposit", true)); /* [algorithms.fieldsolver] --------------------------------------------- */ - set("algorithms.fieldsolver.deltax", + set("algorithms.fieldsolver.delta_x", toml::find_or(toml_data, "algorithms", "fieldsolver", - "deltax", - defaults::fieldsolver::deltax)); - set("algorithms.fieldsolver.deltay", + "delta_x", + defaults::fieldsolver::delta_x)); + set("algorithms.fieldsolver.delta_y", toml::find_or(toml_data, "algorithms", "fieldsolver", - "deltay", - defaults::fieldsolver::deltay)); - set("algorithms.fieldsolver.deltaz", + "delta_y", + defaults::fieldsolver::delta_y)); + set("algorithms.fieldsolver.delta_z", toml::find_or(toml_data, "algorithms", "fieldsolver", - "deltaz", - defaults::fieldsolver::deltaz)); - set("algorithms.fieldsolver.betaxy", + "delta_z", + defaults::fieldsolver::delta_z)); + set("algorithms.fieldsolver.beta_xy", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betaxy", - defaults::fieldsolver::betaxy)); - set("algorithms.fieldsolver.betayx", + "beta_xy", + defaults::fieldsolver::beta_xy)); + set("algorithms.fieldsolver.beta_yx", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betayx", - defaults::fieldsolver::betayx)); - set("algorithms.fieldsolver.betaxz", + "beta_yx", + defaults::fieldsolver::beta_yx)); + set("algorithms.fieldsolver.beta_xz", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betaxz", - defaults::fieldsolver::betaxz)); - set("algorithms.fieldsolver.betazx", + "beta_xz", + defaults::fieldsolver::beta_xz)); + set("algorithms.fieldsolver.beta_zx", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betazx", - defaults::fieldsolver::betazx)); - set("algorithms.fieldsolver.betayz", + "beta_zx", + defaults::fieldsolver::beta_zx)); + set("algorithms.fieldsolver.beta_yz", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betayz", - defaults::fieldsolver::betayz)); - set("algorithms.fieldsolver.betazy", + "beta_yz", + defaults::fieldsolver::beta_yz)); + set("algorithms.fieldsolver.beta_zy", toml::find_or(toml_data, "algorithms", "fieldsolver", - "betazy", - defaults::fieldsolver::betazy)); + "beta_zy", + defaults::fieldsolver::beta_zy)); /* [algorithms.timestep] ------------------------------------------------ */ set("algorithms.timestep.CFL", toml::find_or(toml_data, "algorithms", "timestep", "CFL", defaults::cfl)); diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 07b2c11b3..40ca26d1e 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -45,6 +45,17 @@ const auto mink_1d = u8R"( [algorithms.timestep] CFL = 0.45 + [algorithms.fieldsolver] + delta_x = 1.0 + delta_y = 2.0 + delta_z = 3.0 + beta_xy = 4.0 + beta_yx = 5.0 + beta_xz = 6.0 + beta_zx = 7.0 + beta_yz = 8.0 + beta_zy = 9.0 + [particles] ppc0 = 10.0 clear_interval = 100 @@ -331,6 +342,43 @@ auto main(int argc, char* argv[]) -> int { 1, "output.fields.downsampling.size()"); assert_equal(output_stride[0], 4, "output.fields.downsampling[0]"); + + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_x"), + (real_t)(1.0), + "algorithms.fieldsolver.delta_x"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_y"), + (real_t)(2.0), + "algorithms.fieldsolver.delta_y"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_z"), + (real_t)(3.0), + "algorithms.fieldsolver.delta_z"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_xy"), + (real_t)(4.0), + "algorithms.fieldsolver.beta_xy"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_yx"), + (real_t)(5.0), + "algorithms.fieldsolver.beta_yx"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_xz"), + (real_t)(6.0), + "algorithms.fieldsolver.beta_xz"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_zx"), + (real_t)(7.0), + "algorithms.fieldsolver.beta_zx"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_yz"), + (real_t)(8.0), + "algorithms.fieldsolver.beta_yz"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_zy"), + (real_t)(9.0), + "algorithms.fieldsolver.beta_zy"); } { diff --git a/src/global/defaults.h b/src/global/defaults.h index 47efcb1d2..9192ca8a4 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -27,17 +27,17 @@ namespace ntt::defaults { const timestep_t clear_interval = 100; namespace fieldsolver { - const real_t deltax = 0.0; + const real_t delta_x = 0.0; - const real_t deltay = 0.0; - const real_t betaxy = 0.0; - const real_t betayx = 0.0; + const real_t delta_y = 0.0; + const real_t beta_xy = 0.0; + const real_t beta_yx = 0.0; - const real_t deltaz = 0.0; - const real_t betaxz = 0.0; - const real_t betazx = 0.0; - const real_t betayz = 0.0; - const real_t betazy = 0.0; + const real_t delta_z = 0.0; + const real_t beta_xz = 0.0; + const real_t beta_zx = 0.0; + const real_t beta_yz = 0.0; + const real_t beta_zy = 0.0; } // namespace fieldsolver namespace qsph { From 675e1786e5f6aed74d2285d68d86148e695ce0ea Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 6 Oct 2025 15:46:48 -0400 Subject: [PATCH 082/154] disabled failing test for stats (intended) + faraday mink kernel default values --- src/kernels/faraday_mink.hpp | 21 ++++++++++++++------- src/kernels/tests/particle_moments.cpp | 18 ------------------ 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/kernels/faraday_mink.hpp b/src/kernels/faraday_mink.hpp index 90ea7e34f..35096e0f1 100644 --- a/src/kernels/faraday_mink.hpp +++ b/src/kernels/faraday_mink.hpp @@ -14,6 +14,7 @@ #include "arch/kokkos_aliases.h" #include "utils/error.h" +#include "utils/numeric.h" namespace kernel::mink { using namespace ntt; @@ -42,13 +43,21 @@ namespace kernel::mink { * ! 2D: coeff1 = dt / dx^2, coeff2 = dt * ! 3D: coeff1 = dt / dx */ - Faraday_kernel(const ndfield_t& EB, real_t coeff1, real_t coeff2 - , real_t deltax, real_t deltay, real_t betaxy, real_t betayx - , real_t deltaz, real_t betaxz, real_t betazx, real_t betayz - , real_t betazy) + Faraday_kernel(const ndfield_t& EB, + real_t coeff1, + real_t coeff2, + real_t deltax = ZERO, + real_t deltay = ZERO, + real_t betaxy = ZERO, + real_t betayx = ZERO, + real_t deltaz = ZERO, + real_t betaxz = ZERO, + real_t betazx = ZERO, + real_t betayz = ZERO, + real_t betazy = ZERO) : EB { EB } , coeff1 { coeff1 } - , coeff2 { coeff2 } + , coeff2 { coeff2 } , deltax { deltax } , deltay { deltay } , betaxy { betaxy } @@ -59,8 +68,6 @@ namespace kernel::mink { , betayz { betayz } , betazy { betazy } {} - - Inline void operator()(index_t i1) const { if constexpr (D == Dim::_1D) { const auto alphax = ONE - THREE * deltax; diff --git a/src/kernels/tests/particle_moments.cpp b/src/kernels/tests/particle_moments.cpp index ca3c2a7a0..be86b514a 100644 --- a/src/kernels/tests/particle_moments.cpp +++ b/src/kernels/tests/particle_moments.cpp @@ -291,24 +291,6 @@ auto main(int argc, char* argv[]) -> int { {}, 10); - testParticleMoments>( - { - 10, - 10 - }, - { { 1.0, 2.0 } }, - {}, - 10); - - testParticleMoments>( - { - 10, - 10 - }, - { { 1.0, 10.0 } }, - { { "r0", 0.0 }, { "h", 0.25 } }, - 10); - } catch (std::exception& e) { std::cerr << e.what() << std::endl; Kokkos::finalize(); From 49da4072106ff6b608af2d9e59f108b785fac8bf Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 6 Oct 2025 15:47:03 -0400 Subject: [PATCH 083/154] consistent single/double prec in qkerrschild --- src/kernels/currents_deposit.hpp | 9 +++++---- src/kernels/reduced_stats.hpp | 3 ++- src/metrics/qkerr_schild.h | 16 ++++++++++------ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index bd84554f8..a9c8e26ed 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -129,11 +129,12 @@ namespace kernel { } else { coord_t xp_ { ZERO }; xp_[0] = xp[0]; - real_t theta_Cd { xp[1] }; - const real_t theta_Ph { metric.template convert<2, Crd::Cd, Crd::Ph>( + real_t theta_Cd { xp[1] }; + const auto theta_Ph { metric.template convert<2, Crd::Cd, Crd::Ph>( theta_Cd) }; - const real_t small_angle { static_cast(constant::SMALL_ANGLE_GR) }; - const auto large_angle { static_cast(constant::PI) - small_angle }; + const auto small_angle { static_cast(constant::SMALL_ANGLE_GR) }; + const auto large_angle { static_cast( + constant::PI - constant::SMALL_ANGLE_GR) }; if (theta_Ph < small_angle) { theta_Cd = metric.template convert<2, Crd::Ph, Crd::Cd>(small_angle); } else if (theta_Ph >= large_angle) { diff --git a/src/kernels/reduced_stats.hpp b/src/kernels/reduced_stats.hpp index fe7f36c5a..bdc7633d6 100644 --- a/src/kernels/reduced_stats.hpp +++ b/src/kernels/reduced_stats.hpp @@ -2,7 +2,8 @@ * @file kernels/reduced_stats.hpp * @brief Compute reduced field/moment quantities for stats output * @implements - * - kernel::PrtlToPhys_kernel<> + * - kernel::ReducedFields_kernel<> + * - kernel::ReducedParticleMoments_kernel<> * @namespaces: * - kernel:: */ diff --git a/src/metrics/qkerr_schild.h b/src/metrics/qkerr_schild.h index c137b5bcf..f27a08790 100644 --- a/src/metrics/qkerr_schild.h +++ b/src/metrics/qkerr_schild.h @@ -659,8 +659,9 @@ namespace metric { return ONE; } else { return (ONE + TWO * h0 + - static_cast(12.0) * h0 * (eta * constant::INV_PI) * - ((eta * constant::INV_PI) - ONE)); + static_cast(12.0) * h0 * + (eta * static_cast(constant::INV_PI)) * + ((eta * static_cast(constant::INV_PI)) - ONE)); } } @@ -671,8 +672,10 @@ namespace metric { if (cmp::AlmostZero(h0)) { return eta; } else { - return eta + TWO * h0 * eta * (constant::PI - TWO * eta) * - (constant::PI - eta) * constant::INV_PI_SQR; + return eta + TWO * h0 * eta * + (static_cast(constant::PI) - TWO * eta) * + (static_cast(constant::PI) - eta) * + static_cast(constant::INV_PI_SQR); } } @@ -684,9 +687,10 @@ namespace metric { return deta; } else { return deta * - (ONE + TWO * h0 * constant::INV_PI_SQR * + (ONE + TWO * h0 * static_cast(constant::INV_PI_SQR) * (TWO * THREE * SQR(eta) - - TWO * THREE * constant::PI * eta + constant::PI_SQR)); + TWO * THREE * static_cast(constant::PI) * eta + + static_cast(constant::PI_SQR))); } } From f4faa870b1332df0c2c92c1c510d2918e04fa2d6 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 8 Oct 2025 15:12:55 -0500 Subject: [PATCH 084/154] 10th order shape function --- src/kernels/currents_deposit.hpp | 4 +- src/kernels/particle_shapes.hpp | 116 ++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 5fae87671..f6e8579cf 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -402,7 +402,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 9u)) { + } else if constexpr ((O >= 1u) and (O <= 10u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; @@ -703,7 +703,7 @@ namespace kernel { } // dim } else { // order - raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); } } }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 5eada8614..8814a33b0 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -320,6 +320,79 @@ namespace prtl_shape { } } + inline real_t S10(const real_t x) { + if (x < HALF) { + return static_cast(381773117.0 / 928972800.0) + - static_cast(156409.0 / 737280.0) * SQR(x) + + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) + - static_cast(583.0 / 69120.0) * SQR(CUBE(x)) + + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) + - static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < THREE_HALFS) { + return static_cast(152709293.0 / 371589120.0) + - static_cast(11.0 / 4423680.0) * x + - static_cast(62557.0 / 294912.0) * SQR(x) + - static_cast(11.0 / 92160.0) * CUBE(x) + + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) + - static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) + - static_cast(187.0 / 27648.0) * SQR(CUBE(x)) + - static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x + + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x <= static_cast(2.5)) { + return static_cast(37690169.0 / 92897280.0) + + static_cast(135311.0 / 3870720.0) * x + - static_cast(163603.0 / 516096.0) * SQR(x) + + static_cast(7513.0 / 40320.0) * CUBE(x) + - static_cast(4543.0 / 27648.0) * SQR(SQR(x)) + + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) + - static_cast(715.0 / 6912.0) * SQR(CUBE(x)) + + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x + - static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x + - static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(623786977.0 / 743178240.0) + - static_cast(11695211.0 / 6881280.0) * x + + static_cast(1654543.0 / 589824.0) * SQR(x) + - static_cast(1352153.0 / 430080.0) * CUBE(x) + + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) + - static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) + + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) + - static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x + + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(4.5)) { + return static_cast(-1241720381.0 / 371589120.0) + + static_cast(237959711.0 / 23224320.0) * x + - static_cast(3702215.0 / 294912.0) * SQR(x) + + static_cast(2070343.0 / 241920.0) * CUBE(x) + - static_cast(407429.0 / 110592.0) * SQR(SQR(x)) + + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) + - static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) + + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x + - static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x + - static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(5.5)) { + return static_cast(25937424601.0 / 3715891200.0) + - static_cast(2357947691.0 / 185794560.0) * x + + static_cast(214358881.0 / 20643840.0) * SQR(x) + - static_cast(19487171.0 / 3870720.0) * CUBE(x) + + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) + - static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) + + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) + - static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x + + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) + - static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); + } else { + return ZERO; + } +} + template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { if constexpr (O == 1u) { @@ -609,8 +682,49 @@ namespace prtl_shape { } } } // staggered + } else if constexpr (O == 10u) { + // clang-format off + // S10(x) = + // 381773117/928972800 - (156409/737280) * |x|^2 + (14597/276480) * |x|^4 - (583/69120) * |x|^6 + (11/11520) * |x|^8 - (1/14400) * |x|^10 if |x| ≤ 0.5 + // 152709293/371589120 - (11/4423680) * |x| - (62557/294912) * |x|^2 - (11/92160) * |x|^3 + (5885/110592) * |x|^4 - (77/76800) * |x|^5 - + // (187/27648) * |x|^6 - (11/5760) * |x|^7 + (11/4608) * |x|^8 - (11/17280) * |x|^9 + (1/17280) * |x|^10 if 0.5 < |x| ≤ 1.5 + // 37690169/92897280 + (135311/3870720) * |x| - (163603/516096) * |x|^2 + (7513/40320) * |x|^3 - (4543/27648) * |x|^4 + // + (1661/9600) * |x|^5 - (715/6912) * |x|^6 + (11/315) * |x|^7 - (55/8064) * |x|^8 + (11/15120) * |x|^9 - (1/30240) * |x|^10 if 1.5 < |x| ≤ 2.5 + // 623786977/743178240 - (11695211/6881280) * |x| + (1654543/589824) * |x|^2 - (1352153/430080) * |x|^3 + (479281/221184) * |x|^4 + // - (48433/51200) * |x|^5 + (14905/55296) * |x|^6 - (451/8960) * |x|^7 + (55/9216) * |x|^8 - (11/26880) * |x|^9 + (1/80640) * |x|^10 if 2.5 < |x| ≤ 3.5 + // -1241720381/371589120 + (237959711/23224320) * |x| - (3702215/294912) * |x|^2 + (2070343/241920) * |x|^3 - (407429/110592) * |x|^4 + // + (61061/57600) * |x|^5 - (5753/27648) * |x|^6 + (209/7560) * |x|^7 - (11/4608) * |x|^8 + (11/90720) * |x|^9 - (1/362880) * |x|^10 if 3.5 < |x| ≤ 4.5 + // 25937424601/3715891200 - (2357947691/185794560) * |x| + (214358881/20643840) * |x|^2 - (19487171/3870720) * |x|^3 + (1771561/1105920) * |x|^4 + // - (161051/460800) * |x|^5 + (14641/276480) * |x|^6 - (1331/241920) * |x|^7 + (121/322560) * |x|^8 - (11/725760) * |x|^9 + (1/3628800) * |x|^10 if 4.5 < |x| ≤ 5.5 + // 0.0 otherwise + // clang-format on + if constexpr (not STAGGERED) { // compute at i positions + if (di < HALF) { + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(FIVE + di - static_cast(n))); + } + } else { + i_min = i - 4; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(FOUR + di - static_cast(n))); + } + } + } else { // compute at i + 1/2 positions + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 10; n++) { + S[n] = S10(Kokkos::fabs(static_cast(4.5) + + di - static_cast(n))); + } + } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order. O > 9 not supported. Seriously. What are you even doing here?"); + raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); } } From 2a23da1ddf243caa32c039b55d9aab5365d6e382 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 11:53:23 -0500 Subject: [PATCH 085/154] updated cmake options for 10th order --- cmake/config.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 8324957b8..43899ee40 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -19,8 +19,8 @@ endfunction() # ------------------------------- Shape function --------------------------- # function(set_shape_order shape_order) if(${deposit} STREQUAL "esirkepov") - if(${shape_order} GREATER 9) - message(FATAL_ERROR "Shape order must be between 1 and 9") + if(${shape_order} GREATER 10) + message(FATAL_ERROR "Shape order must be between 1 and 10") endif() add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() From 0e9320367259f614abdc79e6a4b6d120d29cbee8 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 12:35:49 -0500 Subject: [PATCH 086/154] entity now goes to 11! --- CMakeLists.txt | 2 +- cmake/config.cmake | 4 +- src/kernels/currents_deposit.hpp | 5 +- src/kernels/particle_shapes.hpp | 292 ++++++++++++++++++++++--------- 4 files changed, 219 insertions(+), 84 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76a52f29d..48e5689b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ if(${deposit} STREQUAL "zigzag") endif() set(shape_orders - "1" "2" "3" "4" "5" "6" "7" "8" "9" + "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" CACHE STRING "Shape orders") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) diff --git a/cmake/config.cmake b/cmake/config.cmake index 43899ee40..e9b0de390 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -19,8 +19,8 @@ endfunction() # ------------------------------- Shape function --------------------------- # function(set_shape_order shape_order) if(${deposit} STREQUAL "esirkepov") - if(${shape_order} GREATER 10) - message(FATAL_ERROR "Shape order must be between 1 and 10") + if(${shape_order} GREATER 11) + message(FATAL_ERROR "Shape order must be between 1 and 11.") endif() add_compile_options("-DSHAPE_ORDER=${shape_order}") endif() diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index f6e8579cf..18955e795 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -703,7 +703,10 @@ namespace kernel { } // dim } else { // order - raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); + raise::KernelError( + HERE, + "Unsupported interpolation order. O > 11 not supported. Seriously. " + "What are you even doing here? Entity already goes to 11!"); } } }; diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index 8814a33b0..c5d5748de 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -321,77 +321,156 @@ namespace prtl_shape { } inline real_t S10(const real_t x) { - if (x < HALF) { - return static_cast(381773117.0 / 928972800.0) - - static_cast(156409.0 / 737280.0) * SQR(x) - + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) - - static_cast(583.0 / 69120.0) * SQR(CUBE(x)) - + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) - - static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < THREE_HALFS) { - return static_cast(152709293.0 / 371589120.0) - - static_cast(11.0 / 4423680.0) * x - - static_cast(62557.0 / 294912.0) * SQR(x) - - static_cast(11.0 / 92160.0) * CUBE(x) - + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) - - static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) - - static_cast(187.0 / 27648.0) * SQR(CUBE(x)) - - static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x - + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x <= static_cast(2.5)) { - return static_cast(37690169.0 / 92897280.0) - + static_cast(135311.0 / 3870720.0) * x - - static_cast(163603.0 / 516096.0) * SQR(x) - + static_cast(7513.0 / 40320.0) * CUBE(x) - - static_cast(4543.0 / 27648.0) * SQR(SQR(x)) - + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) - - static_cast(715.0 / 6912.0) * SQR(CUBE(x)) - + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x - - static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x - - static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(3.5)) { - return static_cast(623786977.0 / 743178240.0) - - static_cast(11695211.0 / 6881280.0) * x - + static_cast(1654543.0 / 589824.0) * SQR(x) - - static_cast(1352153.0 / 430080.0) * CUBE(x) - + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) - - static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) - + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) - - static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x - + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(4.5)) { - return static_cast(-1241720381.0 / 371589120.0) - + static_cast(237959711.0 / 23224320.0) * x - - static_cast(3702215.0 / 294912.0) * SQR(x) - + static_cast(2070343.0 / 241920.0) * CUBE(x) - - static_cast(407429.0 / 110592.0) * SQR(SQR(x)) - + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) - - static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) - + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x - - static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x - - static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); - } else if (x < static_cast(5.5)) { - return static_cast(25937424601.0 / 3715891200.0) - - static_cast(2357947691.0 / 185794560.0) * x - + static_cast(214358881.0 / 20643840.0) * SQR(x) - - static_cast(19487171.0 / 3870720.0) * CUBE(x) - + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) - - static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) - + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) - - static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x - + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) - - static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); - } else { - return ZERO; + if (x < HALF) { + return static_cast(381773117.0 / 928972800.0) - + static_cast(156409.0 / 737280.0) * SQR(x) + + static_cast(14597.0 / 276480.0) * SQR(SQR(x)) - + static_cast(583.0 / 69120.0) * SQR(CUBE(x)) + + static_cast(11.0 / 11520.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < THREE_HALFS) { + return static_cast(152709293.0 / 371589120.0) - + static_cast(11.0 / 4423680.0) * x - + static_cast(62557.0 / 294912.0) * SQR(x) - + static_cast(11.0 / 92160.0) * CUBE(x) + + static_cast(5885.0 / 110592.0) * SQR(SQR(x)) - + static_cast(77.0 / 76800.0) * CUBE(x) * SQR(x) - + static_cast(187.0 / 27648.0) * SQR(CUBE(x)) - + static_cast(11.0 / 5760.0) * SQR(CUBE(x)) * x + + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 17280.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x <= static_cast(2.5)) { + return static_cast(37690169.0 / 92897280.0) + + static_cast(135311.0 / 3870720.0) * x - + static_cast(163603.0 / 516096.0) * SQR(x) + + static_cast(7513.0 / 40320.0) * CUBE(x) - + static_cast(4543.0 / 27648.0) * SQR(SQR(x)) + + static_cast(1661.0 / 9600.0) * CUBE(x) * SQR(x) - + static_cast(715.0 / 6912.0) * SQR(CUBE(x)) + + static_cast(11.0 / 315.0) * SQR(CUBE(x)) * x - + static_cast(55.0 / 8064.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 15120.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 30240.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(3.5)) { + return static_cast(623786977.0 / 743178240.0) - + static_cast(11695211.0 / 6881280.0) * x + + static_cast(1654543.0 / 589824.0) * SQR(x) - + static_cast(1352153.0 / 430080.0) * CUBE(x) + + static_cast(479281.0 / 221184.0) * SQR(SQR(x)) - + static_cast(48433.0 / 51200.0) * CUBE(x) * SQR(x) + + static_cast(14905.0 / 55296.0) * SQR(CUBE(x)) - + static_cast(451.0 / 8960.0) * SQR(CUBE(x)) * x + + static_cast(55.0 / 9216.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 26880.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 80640.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(4.5)) { + return static_cast(-1241720381.0 / 371589120.0) + + static_cast(237959711.0 / 23224320.0) * x - + static_cast(3702215.0 / 294912.0) * SQR(x) + + static_cast(2070343.0 / 241920.0) * CUBE(x) - + static_cast(407429.0 / 110592.0) * SQR(SQR(x)) + + static_cast(61061.0 / 57600.0) * CUBE(x) * SQR(x) - + static_cast(5753.0 / 27648.0) * SQR(CUBE(x)) + + static_cast(209.0 / 7560.0) * SQR(CUBE(x)) * x - + static_cast(11.0 / 4608.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 90720.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 362880.0) * SQR(SQR(SQR(x))) * SQR(x); + } else if (x < static_cast(5.5)) { + return static_cast(25937424601.0 / 3715891200.0) - + static_cast(2357947691.0 / 185794560.0) * x + + static_cast(214358881.0 / 20643840.0) * SQR(x) - + static_cast(19487171.0 / 3870720.0) * CUBE(x) + + static_cast(1771561.0 / 1105920.0) * SQR(SQR(x)) - + static_cast(161051.0 / 460800.0) * CUBE(x) * SQR(x) + + static_cast(14641.0 / 276480.0) * SQR(CUBE(x)) - + static_cast(1331.0 / 241920.0) * SQR(CUBE(x)) * x + + static_cast(121.0 / 322560.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 725760.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x); + } else { + return ZERO; + } + } + + inline real_t S11(const real_t x) { + if (x < ONE) { + return static_cast(655177.0 / 1663200.0) - + static_cast(809.0 / 4320.0) * SQR(x) + + static_cast(31.0 / 720.0) * SQR(SQR(x)) - + static_cast(23.0 / 3600.0) * CUBE(SQR(x)) + + static_cast(1.0 / 1440.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 14400.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 86400.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x <= TWO) { + return static_cast(65521.0 / 166320.0) - + static_cast(11.0 / 50400.0) * x - + static_cast(563.0 / 3024.0) * SQR(x) - + static_cast(11.0 / 3360.0) * CUBE(x) + + static_cast(25.0 / 504.0) * SQR(SQR(x)) - + static_cast(11.0 / 1200.0) * CUBE(x) * SQR(x) + + static_cast(1.0 / 360.0) * SQR(CUBE(x)) - + static_cast(11.0 / 1680.0) * SQR(CUBE(x)) * x + + static_cast(1.0 / 252.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 10080.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 6720.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 120960.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < THREE) { + return static_cast(61297.0 / 166320.0) + + static_cast(781.0 / 5600.0) * x - + static_cast(1619.0 / 3024.0) * SQR(x) + + static_cast(583.0 / 1120.0) * CUBE(x) - + static_cast(239.0 / 504.0) * SQR(SQR(x)) + + static_cast(143.0 / 400.0) * CUBE(x) * SQR(x) - + static_cast(13.0 / 72.0) * SQR(CUBE(x)) + + static_cast(33.0 / 560.0) * SQR(CUBE(x)) * x - + static_cast(25.0 / 2016.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 6720.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 8064.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 241920.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x <= FOUR) { + return static_cast(894727.0 / 665280.0) - + static_cast(38533.0 / 11200.0) * x + + static_cast(9385.0 / 1728.0) * SQR(x) - + static_cast(12199.0 / 2240.0) * CUBE(x) + + static_cast(1009.0 / 288.0) * SQR(SQR(x)) - + static_cast(1199.0 / 800.0) * CUBE(x) * SQR(x) + + static_cast(631.0 / 1440.0) * SQR(CUBE(x)) - + static_cast(99.0 / 1120.0) * SQR(CUBE(x)) * x + + static_cast(7.0 / 576.0) * SQR(SQR(SQR(x))) - + static_cast(11.0 / 10080.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 17280.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 725760.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < FIVE) { + return -static_cast(18595037.0 / 3326400.0) + + static_cast(4726777.0 / 302400.0) * x - + static_cast(1113317.0 / 60480.0) * SQR(x) + + static_cast(250657.0 / 20160.0) * CUBE(x) - + static_cast(54797.0 / 10080.0) * SQR(SQR(x)) + + static_cast(11737.0 / 7200.0) * CUBE(x) * SQR(x) - + static_cast(2477.0 / 7200.0) * SQR(CUBE(x)) + + static_cast(517.0 / 10080.0) * SQR(CUBE(x)) * x - + static_cast(107.0 / 20160.0) * SQR(SQR(SQR(x))) + + static_cast(11.0 / 30240.0) * SQR(SQR(SQR(x))) * x - + static_cast(1.0 / 67200.0) * SQR(SQR(SQR(x))) * SQR(x) + + static_cast(1.0 / 3628800.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else if (x < SIX) { + return static_cast(17496.0 / 1925.0) - + static_cast(2916.0 / 175.0) * x + + static_cast(486.0 / 35.0) * SQR(x) - + static_cast(243.0 / 35.0) * CUBE(x) + + static_cast(81.0 / 35.0) * SQR(SQR(x)) - + static_cast(27.0 / 50.0) * CUBE(x) * SQR(x) + + static_cast(9.0 / 100.0) * SQR(CUBE(x)) - + static_cast(3.0 / 280.0) * SQR(CUBE(x)) * x + + static_cast(1.0 / 1120.0) * SQR(SQR(SQR(x))) - + static_cast(1.0 / 20160.0) * SQR(SQR(SQR(x))) * x + + static_cast(1.0 / 604800.0) * SQR(SQR(SQR(x))) * SQR(x) - + static_cast(1.0 / 39916800.0) * SQR(SQR(SQR(x))) * SQR(x) * x; + } else { + return ZERO; + } } -} template Inline void order(const int& i, const real_t& di, int& i_min, real_t S[O + 1]) { @@ -451,16 +530,14 @@ namespace prtl_shape { S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * - CUBE(HALF + di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); S[2] = ONE - S[0] - S[1] - S[3]; } else { i_min = i - 1; S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + HALF * CUBE(di - HALF); - S[3] = static_cast(1.0 / 6.0) * - CUBE(HALF - di); + S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); S[2] = ONE - S[0] - S[1] - S[3]; } } // staggered @@ -703,14 +780,14 @@ namespace prtl_shape { i_min = i - 5; #pragma unroll - for (int n = 0; n < 10; n++) { + for (int n = 0; n < 11; n++) { S[n] = S10(Kokkos::fabs(FIVE + di - static_cast(n))); } } else { i_min = i - 4; #pragma unroll - for (int n = 0; n < 10; n++) { + for (int n = 0; n < 11; n++) { S[n] = S10(Kokkos::fabs(FOUR + di - static_cast(n))); } } @@ -718,13 +795,68 @@ namespace prtl_shape { i_min = i - 5; #pragma unroll - for (int n = 0; n < 10; n++) { - S[n] = S10(Kokkos::fabs(static_cast(4.5) + - di - static_cast(n))); + for (int n = 0; n < 11; n++) { + S[n] = S10( + Kokkos::fabs(static_cast(4.5) + di - static_cast(n))); + } + } // staggered + } else if constexpr (O == 11u) { + // clang-format off + // S11(x) = + // 655177/1663200 - (809/4320) * |x|^2 + (31/720) * |x|^4 - (23/3600) * |x|^6 + // + (1/1440) * |x|^8 - (1/14400) * |x|^10 + (1/86400) * |x|^11 if |x| < 1 + // 65521/166320 - (11/50400) * |x| - (563/3024) * |x|^2 - (11/3360) * |x|^3 + // + (25/504) * |x|^4 - (11/1200) * |x|^5 + (1/360) * |x|^6 + // - (11/1680) * |x|^7 + (1/252) * |x|^8 - (11/10080) * |x|^9 + // + (1/6720) * |x|^10 - (1/120960) * |x|^11 if 1 ≤ |x| ≤ 2 + // 61297/166320 + (781/5600) * |x| - (1619/3024) * |x|^2 + (583/1120) * |x|^3 + // - (239/504) * |x|^4 + (143/400) * |x|^5 - (13/72) * |x|^6 + // + (33/560) * |x|^7 - (25/2016) * |x|^8 + (11/6720) * |x|^9 + // - (1/8064) * |x|^10 + (1/241920) * |x|^11 if 2 < |x| < 3 + // 894727/665280 - (38533/11200) * |x| + (9385/1728) * |x|^2 - (12199/2240) * |x|^3 + // + (1009/288) * |x|^4 - (1199/800) * |x|^5 + (631/1440) * |x|^6 + // - (99/1120) * |x|^7 + (7/576) * |x|^8 - (11/10080) * |x|^9 + // + (1/17280) * |x|^10 - (1/725760) * |x|^11 if 3 ≤ |x| ≤ 4 + // -18595037/3326400 + (4726777/302400) * |x| - (1113317/60480) * |x|^2 + (250657/20160) * |x|^3 + // - (54797/10080) * |x|^4 + (11737/7200) * |x|^5 - (2477/7200) * |x|^6 + // + (517/10080) * |x|^7 - (107/20160) * |x|^8 + (11/30240) * |x|^9 + // - (1/67200) * |x|^10 + (1/3628800) * |x|^11 if 4 < |x| < 5 + // 17496/1925 - (2916/175) * |x| + (486/35) * |x|^2 - (243/35) * |x|^3 + // + (81/35) * |x|^4 - (27/50) * |x|^5 + (9/100) * |x|^6 + // - (3/280) * |x|^7 + (1/1120) * |x|^8 - (1/20160) * |x|^9 + // + (1/604800) * |x|^10 - (1/39916800) * |x|^11 if 5 ≤ |x| < 6 + // 0.0 otherwise + // clang-format on + if constexpr (not STAGGERED) { // compute at i positions + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs(FIVE + di - static_cast(n))); + } + } else { // compute at i + 1/2 positions + if (di < HALF) { + i_min = i - 6; + + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs( + static_cast(5.5) + di - static_cast(n))); + } + } else { + i_min = i - 5; + +#pragma unroll + for (int n = 0; n < 12; n++) { + S[n] = S11(Kokkos::fabs( + static_cast(4.5) + di - static_cast(n))); + } } } // staggered } else { - raise::KernelError(HERE, "Unsupported interpolation order. O > 10 not supported. Seriously. What are you even doing here?"); + raise::KernelError( + HERE, + "Unsupported interpolation order. O > 11 not supported. Seriously. " + "What are you even doing here? Entity already goes to 11!"); } } From 65cbba694e68a4d69600d5ef8ccb65738be56a6c Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Thu, 9 Oct 2025 14:16:59 -0500 Subject: [PATCH 087/154] revert to unoptimized version for S3 for further testing --- src/kernels/particle_shapes.hpp | 53 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/src/kernels/particle_shapes.hpp b/src/kernels/particle_shapes.hpp index c5d5748de..ded47ec59 100644 --- a/src/kernels/particle_shapes.hpp +++ b/src/kernels/particle_shapes.hpp @@ -518,27 +518,52 @@ namespace prtl_shape { // 2/3 - x^2 + 1/2 * x^3 |x| < 1 // S(x) = 1/6 * (2 - |x|)^3 1 ≤ |x| < 2 // 0.0 |x| ≥ 2 + // if constexpr (not STAGGERED) { // compute at i positions + // i_min = i - 1; + // S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); + // S[3] = static_cast(1.0 / 6.0) * CUBE(di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } else { // compute at i + 1/2 positions + // if (di < HALF) { + // i_min = i - 2; + // S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + + // HALF * CUBE(HALF + di); + // S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } else { + // i_min = i - 1; + // S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); + // S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + + // HALF * CUBE(di - HALF); + // S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); + // S[2] = ONE - S[0] - S[1] - S[3]; + // } + // } // staggered if constexpr (not STAGGERED) { // compute at i positions i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(ONE - di); - S[1] = static_cast(2.0 / 3.0) - SQR(di) + HALF * CUBE(di); - S[3] = static_cast(1.0 / 6.0) * CUBE(di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs(ONE + di - static_cast(n))); + } } else { // compute at i + 1/2 positions if (di < HALF) { i_min = i - 2; - S[0] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[1] = static_cast(2.0 / 3.0) - SQR(HALF + di) + - HALF * CUBE(HALF + di); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF + di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs( + static_cast(1.5) + di - static_cast(n))); + } } else { i_min = i - 1; - S[0] = static_cast(1.0 / 6.0) * CUBE(THREE_HALFS - di); - S[1] = static_cast(2.0 / 3.0) - SQR(di - HALF) + - HALF * CUBE(di - HALF); - S[3] = static_cast(1.0 / 6.0) * CUBE(HALF - di); - S[2] = ONE - S[0] - S[1] - S[3]; + +#pragma unroll + for (int n = 0; n < 4; n++) { + S[n] = S3(Kokkos::fabs(HALF + di - static_cast(n))); + } } } // staggered } else if constexpr (O == 4u) { From 1971c1d8ce8f0b8454f7735a013a2b4332712553 Mon Sep 17 00:00:00 2001 From: hayk Date: Tue, 21 Oct 2025 01:55:55 -0400 Subject: [PATCH 088/154] particle checkpoints refactored + added extra vars --- input.example.toml | 13 +- src/checkpoint/reader.cpp | 30 +- src/checkpoint/reader.h | 11 +- src/checkpoint/tests/checkpoint-nompi.cpp | 50 ++- src/checkpoint/writer.cpp | 247 ++++++----- src/checkpoint/writer.h | 30 +- src/framework/CMakeLists.txt | 1 + src/framework/containers/particles.cpp | 31 +- src/framework/containers/particles.h | 53 ++- src/framework/containers/particles_io.cpp | 482 ++++++++++++++++++++++ src/framework/containers/species.h | 50 ++- src/framework/domain/checkpoint.cpp | 337 +-------------- src/framework/parameters.cpp | 27 +- src/framework/tests/parameters.cpp | 25 +- src/framework/tests/particles.cpp | 53 ++- src/output/CMakeLists.txt | 2 + src/output/tests/writer-nompi.cpp | 29 +- src/output/utils/attr_writer.h | 1 + src/output/utils/interpret_prompt.h | 4 +- src/output/utils/readers.cpp | 99 +++++ src/output/utils/readers.h | 48 +++ src/output/utils/writers.cpp | 94 +++++ src/output/utils/writers.h | 55 +++ 23 files changed, 1216 insertions(+), 556 deletions(-) create mode 100644 src/framework/containers/particles_io.cpp create mode 100644 src/output/utils/readers.cpp create mode 100644 src/output/utils/readers.h create mode 100644 src/output/utils/writers.cpp create mode 100644 src/output/utils/writers.h diff --git a/input.example.toml b/input.example.toml index 028102b08..083544647 100644 --- a/input.example.toml +++ b/input.example.toml @@ -302,10 +302,19 @@ # @default: "Boris" [massive]; "Photon" [massless] # @enum: "Boris", "Vay", "Boris,GCA", "Vay,GCA", "Photon", "None" pusher = "" - # Number of additional (payload) variables for each particle of the given species + # Number of additional real-valued variables (payloads) for each particle of the given species # @type: ushort # @default: 0 - n_payloads = "" + n_payloads_real = "" + # Number of additional integer-valued variables (payloads) for each particle of the given species + # @type: ushort + # @default: 0 + # @note: If tracking is enabled, one or two extra integer payloads are reserved (depending on whether MPI is enabled) + n_payloads_int = "" + # Enable tracking of particles using indices for the given species + # @type: bool + # @default: false + tracking = "" # Radiation reaction to use for the species # @type: string # @default: "None" diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp index d973b0ddd..b5d6f0c44 100644 --- a/src/checkpoint/reader.cpp +++ b/src/checkpoint/reader.cpp @@ -108,15 +108,20 @@ namespace checkpoint { } } + template void ReadParticlePayloads(adios2::IO& io, adios2::Engine& reader, + const std::string& suffix, spidx_t s, - array_t& array, + array_t& array, std::size_t nplds, npart_t count, npart_t offset) { - logger::Checkpoint(fmt::format("Reading quantity: s%d_plds", s + 1), HERE); - auto var = io.InquireVariable(fmt::format("s%d_plds", s + 1)); + logger::Checkpoint( + fmt::format("Reading quantity: s%d_pld_%s", s + 1, suffix.c_str()), + HERE); + auto var = io.InquireVariable( + fmt::format("s%d_pld_%s", s + 1, suffix.c_str())); if (var) { var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); const auto slice = range_tuple_t(0, count); @@ -126,7 +131,9 @@ namespace checkpoint { adios2::Mode::Sync); Kokkos::deep_copy(array, array_h); } else { - raise::Error(fmt::format("Variable: s%d_plds not found", s + 1), HERE); + raise::Error( + fmt::format("Variable: s%d_pld_%s not found", s + 1, suffix.c_str()), + HERE); } } @@ -158,4 +165,19 @@ namespace checkpoint { CHECKPOINT_PARTICLE_DATA(short) #undef CHECKPOINT_PARTICLE_DATA +#define CHECKPOINT_PARTICLE_PAYLOADS(T) \ + template void ReadParticlePayloads(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + spidx_t, \ + array_t&, \ + std::size_t, \ + npart_t, \ + npart_t); + CHECKPOINT_PARTICLE_PAYLOADS(int) + CHECKPOINT_PARTICLE_PAYLOADS(float) + CHECKPOINT_PARTICLE_PAYLOADS(double) + CHECKPOINT_PARTICLE_PAYLOADS(npart_t) +#undef CHECKPOINT_PARTICLE_DATA + } // namespace checkpoint diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h index 7939ba82b..78cc73eb7 100644 --- a/src/checkpoint/reader.h +++ b/src/checkpoint/reader.h @@ -30,11 +30,8 @@ namespace checkpoint { const adios2::Box&, ndfield_t&); - auto ReadParticleCount(adios2::IO&, - adios2::Engine&, - spidx_t, - std::size_t, - std::size_t) -> std::pair; + auto ReadParticleCount(adios2::IO&, adios2::Engine&, spidx_t, std::size_t, std::size_t) + -> std::pair; template void ReadParticleData(adios2::IO&, @@ -45,10 +42,12 @@ namespace checkpoint { npart_t, npart_t); + template void ReadParticlePayloads(adios2::IO&, adios2::Engine&, + const std::string&, spidx_t, - array_t&, + array_t&, std::size_t, npart_t, npart_t); diff --git a/src/checkpoint/tests/checkpoint-nompi.cpp b/src/checkpoint/tests/checkpoint-nompi.cpp index 132a3679a..7be41ca3f 100644 --- a/src/checkpoint/tests/checkpoint-nompi.cpp +++ b/src/checkpoint/tests/checkpoint-nompi.cpp @@ -50,6 +50,11 @@ auto main(int argc, char* argv[]) -> int { array_t i2 { "i_2", npart2 }; array_t u2 { "u_2", npart2 }; + array_t pldr_2 { "pldr_2", npart2, 2 }; + + array_t pldi_1 { "pldi_1", npart1, 1 }; + array_t pldi_2 { "pldi_2", npart2, 2 }; + { // fill data Kokkos::parallel_for( @@ -77,15 +82,20 @@ auto main(int argc, char* argv[]) -> int { "fillPrtl1", npart1, Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); + u1(p) = static_cast(p); + i1(p) = static_cast(p); + pldi_1(p, 0) = static_cast(p * 10); }); Kokkos::parallel_for( "fillPrtl2", npart2, Lambda(index_t p) { - u2(p) = -static_cast(p); - i2(p) = -static_cast(p); + u2(p) = -static_cast(p); + i2(p) = -static_cast(p); + pldr_2(p, 0) = static_cast(p); + pldr_2(p, 1) = static_cast(p * 2); + pldi_2(p, 0) = static_cast(p * 3); + pldi_2(p, 1) = static_cast(p * 4); }); } @@ -101,7 +111,7 @@ auto main(int argc, char* argv[]) -> int { { nx1_gh, nx2_gh, nx3_gh }, { 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }); - writer.defineParticleVariables(Coord::Sph, Dim::_3D, 2, { 0, 2 }); + writer.defineParticleVariables(Coord::Sph, Dim::_3D, 2, { 0, 2 }, { 1, 2 }); writer.beginSaving(0, 0.0); @@ -116,6 +126,11 @@ auto main(int argc, char* argv[]) -> int { writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); + writer.saveParticlePayloads("s2_pld_r", 2, npart2, 0, npart2, pldr_2); + + writer.saveParticlePayloads("s1_pld_i", 1, npart1, 0, npart1, pldi_1); + writer.saveParticlePayloads("s2_pld_i", 2, npart2, 0, npart2, pldi_2); + writer.endSaving(); } @@ -129,6 +144,11 @@ auto main(int argc, char* argv[]) -> int { array_t i2_read { "i_2", npart2 }; array_t u2_read { "u_2", npart2 }; + array_t pldr_2_read { "pldr_2", npart2, 2 }; + + array_t pldi_1_read { "pldi_1", npart1, 1 }; + array_t pldi_2_read { "pldi_2", npart2, 2 }; + adios2::IO io = adios.DeclareIO("checkpointRead"); adios2::Engine reader = io.Open(checkpoint_path / "step-00000000.bp", adios2::Mode::Read); @@ -147,6 +167,11 @@ auto main(int argc, char* argv[]) -> int { ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); + ReadParticlePayloads(io, reader, "r", 1, pldr_2_read, 2, nprtl2, noff2); + + ReadParticlePayloads(io, reader, "i", 0, pldi_1_read, 1, nprtl1, noff1); + ReadParticlePayloads(io, reader, "i", 1, pldi_2_read, 2, nprtl2, noff2); + reader.EndStep(); reader.Close(); @@ -182,6 +207,9 @@ auto main(int argc, char* argv[]) -> int { if (i1(p) != i1_read(p)) { raise::KernelError(HERE, "i1 read failed"); } + if (pldi_1(p, 0) != pldi_1_read(p, 0)) { + raise::KernelError(HERE, "pldi_1 read failed"); + } }); Kokkos::parallel_for( "checkPrtl2", @@ -193,6 +221,18 @@ auto main(int argc, char* argv[]) -> int { if (i2(p) != i2_read(p)) { raise::KernelError(HERE, "i2 read failed"); } + if (not cmp::AlmostEqual(pldr_2(p, 0), pldr_2_read(p, 0))) { + raise::KernelError(HERE, "pldr_2(0) read failed"); + } + if (not cmp::AlmostEqual(pldr_2(p, 1), pldr_2_read(p, 1))) { + raise::KernelError(HERE, "pldr_2(1) read failed"); + } + if (pldi_2(p, 0) != pldi_2_read(p, 0)) { + raise::KernelError(HERE, "pldi_2(0) read failed"); + } + if (pldi_2(p, 1) != pldi_2_read(p, 1)) { + raise::KernelError(HERE, "pldi_2(1) read failed"); + } }); } diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp index b766ddfbd..ebb8663ae 100644 --- a/src/checkpoint/writer.cpp +++ b/src/checkpoint/writer.cpp @@ -75,68 +75,84 @@ namespace checkpoint { } } - void Writer::defineParticleVariables(const ntt::Coord& C, - Dimension dim, - std::size_t nspec, - const std::vector& nplds) { - raise::ErrorIf(nplds.size() != nspec, - "Number of payloads does not match the number of species", - HERE); - for (auto s { 0u }; s < nspec; ++s) { - m_io.DefineVariable(fmt::format("s%d_npart", s + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - - for (auto d { 0u }; d < dim; ++d) { - m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - m_io.DefineVariable(fmt::format("s%d_dx%d", s + 1, d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - m_io.DefineVariable(fmt::format("s%d_i%d_prev", s + 1, d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - m_io.DefineVariable(fmt::format("s%d_dx%d_prev", s + 1, d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - - if (dim == Dim::_2D and C != ntt::Coord::Cart) { - m_io.DefineVariable(fmt::format("s%d_phi", s + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - - for (auto d { 0u }; d < 3; ++d) { - m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - - m_io.DefineVariable(fmt::format("s%d_tag", s + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - m_io.DefineVariable(fmt::format("s%d_weight", s + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - if (nplds[s] > 0) { - m_io.DefineVariable(fmt::format("s%d_plds", s + 1), - { adios2::UnknownDim, nplds[s] }, - { adios2::UnknownDim, 0 }, - { adios2::UnknownDim, nplds[s] }); - } - } - } + // void Writer::defineParticleVariables(const ntt::Coord& C, + // Dimension dim, + // std::size_t nspec, + // const std::vector& npld_r, + // const std::vector& npld_i) { + // raise::ErrorIf( + // npld_r.size() != nspec, + // "Number of real payloads does not match the number of species", + // HERE); + // raise::ErrorIf( + // npld_i.size() != nspec, + // "Number of int payloads does not match the number of species", + // HERE); + // for (auto s { 0u }; s < nspec; ++s) { + // m_io.DefineVariable(fmt::format("s%d_npart", s + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // m_io.DefineVariable(fmt::format("s%d_counter", s + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // + // for (auto d { 0u }; d < dim; ++d) { + // m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // m_io.DefineVariable(fmt::format("s%d_dx%d", s + 1, d + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // m_io.DefineVariable(fmt::format("s%d_i%d_prev", s + 1, d + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // m_io.DefineVariable(fmt::format("s%d_dx%d_prev", s + 1, d + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // } + // + // if (dim == Dim::_2D and C != ntt::Coord::Cart) { + // m_io.DefineVariable(fmt::format("s%d_phi", s + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // } + // + // for (auto d { 0u }; d < 3; ++d) { + // m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // } + // + // m_io.DefineVariable(fmt::format("s%d_tag", s + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // m_io.DefineVariable(fmt::format("s%d_weight", s + 1), + // { adios2::UnknownDim }, + // { adios2::UnknownDim }, + // { adios2::UnknownDim }); + // if (npld_r[s] > 0) { + // m_io.DefineVariable(fmt::format("s%d_pld_r", s + 1), + // { adios2::UnknownDim, npld_r[s] }, + // { adios2::UnknownDim, 0 }, + // { adios2::UnknownDim, npld_r[s] }); + // } + // if (npld_i[s] > 0) { + // m_io.DefineVariable(fmt::format("s%d_pld_i", s + 1), + // { adios2::UnknownDim, npld_i[s] }, + // { adios2::UnknownDim, 0 }, + // { adios2::UnknownDim, npld_i[s] }); + // } + // } + // } auto Writer::shouldSave(timestep_t step, simtime_t time) -> bool { return m_enabled and m_tracker.shouldWrite(step, time); @@ -193,17 +209,6 @@ namespace checkpoint { }); } - template - void Writer::savePerDomainVariable(const std::string& varname, - std::size_t total, - std::size_t offset, - T data) { - auto var = m_io.InquireVariable(varname); - var.SetShape({ total }); - var.SetSelection(adios2::Box({ offset }, { 1 })); - m_writer.Put(var, &data); - } - void Writer::saveAttrs(const ntt::SimulationParams& params, simtime_t time) { CallOnce([&]() { std::ofstream metadata; @@ -228,53 +233,43 @@ namespace checkpoint { adios2::Mode::Sync); } - template - void Writer::saveParticleQuantity(const std::string& quantity, - npart_t glob_total, - npart_t loc_offset, - npart_t loc_size, - const array_t& data) { - const auto slice = range_tuple_t(0, loc_size); - auto var = m_io.InquireVariable(quantity); - - var.SetShape({ glob_total }); - var.SetSelection(adios2::Box({ loc_offset }, { loc_size })); - - auto data_h = Kokkos::create_mirror_view(data); - Kokkos::deep_copy(data_h, data); - auto data_sub = Kokkos::subview(data_h, slice); - m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); - } - - void Writer::saveParticlePayloads(const std::string& quantity, - std::size_t nplds, - npart_t glob_total, - npart_t loc_offset, - npart_t loc_size, - const array_t& data) { - const auto slice = range_tuple_t(0, loc_size); - auto var = m_io.InquireVariable(quantity); - - var.SetShape({ glob_total, nplds }); - var.SetSelection( - adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); - - auto data_h = Kokkos::create_mirror_view(data); - Kokkos::deep_copy(data_h, data); - auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); - m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); - } - -#define CHECKPOINT_PERDOMAIN_VARIABLE(T) \ - template void Writer::savePerDomainVariable(const std::string&, \ - std::size_t, \ - std::size_t, \ - T); - CHECKPOINT_PERDOMAIN_VARIABLE(int) - CHECKPOINT_PERDOMAIN_VARIABLE(float) - CHECKPOINT_PERDOMAIN_VARIABLE(double) - CHECKPOINT_PERDOMAIN_VARIABLE(npart_t) -#undef CHECKPOINT_PERDOMAIN_VARIABLE + // template + // void Writer::saveParticleQuantity(const std::string& quantity, + // npart_t glob_total, + // npart_t loc_offset, + // npart_t loc_size, + // const array_t& data) { + // const auto slice = range_tuple_t(0, loc_size); + // auto var = m_io.InquireVariable(quantity); + // + // var.SetShape({ glob_total }); + // var.SetSelection(adios2::Box({ loc_offset }, { loc_size })); + // + // auto data_h = Kokkos::create_mirror_view(data); + // Kokkos::deep_copy(data_h, data); + // auto data_sub = Kokkos::subview(data_h, slice); + // m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + // } + // + // template + // void Writer::saveParticlePayloads(const std::string& quantity, + // std::size_t nplds, + // npart_t glob_total, + // npart_t loc_offset, + // npart_t loc_size, + // const array_t& data) { + // const auto slice = range_tuple_t(0, loc_size); + // auto var = m_io.InquireVariable(quantity); + // + // var.SetShape({ glob_total, nplds }); + // var.SetSelection( + // adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); + // + // auto data_h = Kokkos::create_mirror_view(data); + // Kokkos::deep_copy(data_h, data); + // auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); + // m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); + // } #define CHECKPOINT_FIELD(D, N) \ template void Writer::saveField(const std::string&, \ @@ -287,16 +282,4 @@ namespace checkpoint { CHECKPOINT_FIELD(Dim::_3D, 6) #undef CHECKPOINT_FIELD -#define CHECKPOINT_PARTICLE_QUANTITY(T) \ - template void Writer::saveParticleQuantity(const std::string&, \ - npart_t, \ - npart_t, \ - npart_t, \ - const array_t&); - CHECKPOINT_PARTICLE_QUANTITY(int) - CHECKPOINT_PARTICLE_QUANTITY(float) - CHECKPOINT_PARTICLE_QUANTITY(double) - CHECKPOINT_PARTICLE_QUANTITY(short) -#undef CHECKPOINT_PARTICLE_QUANTITY - } // namespace checkpoint diff --git a/src/checkpoint/writer.h b/src/checkpoint/writer.h index 6f8bc8cb5..2750e2226 100644 --- a/src/checkpoint/writer.h +++ b/src/checkpoint/writer.h @@ -62,35 +62,23 @@ namespace checkpoint { void saveAttrs(const ntt::SimulationParams&, simtime_t); - template - void savePerDomainVariable(const std::string&, std::size_t, std::size_t, T); - template void saveField(const std::string&, const ndfield_t&); - template - void saveParticleQuantity(const std::string&, - npart_t, - npart_t, - npart_t, - const array_t&); - - void saveParticlePayloads(const std::string&, - std::size_t, - npart_t, - npart_t, - npart_t, - const array_t&); - void defineFieldVariables(const ntt::SimEngine&, const std::vector&, const std::vector&, const std::vector&); - void defineParticleVariables(const ntt::Coord&, - Dimension, - std::size_t, - const std::vector&); + [[nodiscard]] + auto io() -> adios2::IO& { + return m_io; + } + + [[nodiscard]] + auto writer() -> adios2::Engine& { + return m_writer; + } [[nodiscard]] auto enabled() const -> bool { diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index b74d11bec..07328fff6 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -47,6 +47,7 @@ set(SOURCES if(${output}) list(APPEND SOURCES ${SRC_DIR}/domain/output.cpp) list(APPEND SOURCES ${SRC_DIR}/domain/checkpoint.cpp) + list(APPEND SOURCES ${SRC_DIR}/containers/particles_io.cpp) endif() add_library(ntt_framework ${SOURCES}) diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index d2db9c491..e9e516221 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -22,10 +22,22 @@ namespace ntt { float ch, npart_t maxnpart, const PrtlPusher& pusher, + bool use_tracking, bool use_gca, const Cooling& cooling, - unsigned short npld) - : ParticleSpecies(index, label, m, ch, maxnpart, pusher, use_gca, cooling, npld) { + unsigned short npld_r, + unsigned short npld_i) + : ParticleSpecies(index, + label, + m, + ch, + maxnpart, + pusher, + use_tracking, + use_gca, + cooling, + npld_r, + npld_i) { if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1 = array_t { label + "_i1", maxnpart }; @@ -56,8 +68,11 @@ namespace ntt { tag = array_t { label + "_tag", maxnpart }; - if (npld > 0) { - pld = array_t { label + "_pld", maxnpart, npld }; + if (npld_r > 0) { + pld_r = array_t { label + "_pld_r", maxnpart, npld_r }; + } + if (npld_i > 0) { + pld_i = array_t { label + "_pld_i", maxnpart, npld_i }; } if ((D == Dim::_2D) && (C != Coord::Cart)) { @@ -206,8 +221,12 @@ namespace ntt { RemoveDeadInArray(phi, indices_alive); } - if (npld() > 0) { - RemoveDeadInArray(pld, indices_alive); + if (npld_r() > 0) { + RemoveDeadInArray(pld_r, indices_alive); + } + + if (npld_i() > 0) { + RemoveDeadInArray(pld_i, indices_alive); } Kokkos::Experimental::fill( diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 8ff74be33..c85759035 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -23,6 +23,10 @@ #include +#if defined(OUTPUT_ENABLED) + #include +#endif + #include #include @@ -38,6 +42,7 @@ namespace ntt { private: // Number of currently active (used) particles npart_t m_npart { 0 }; + npart_t m_counter { 0 }; bool m_is_sorted { false }; #if !defined(MPI_ENABLED) @@ -60,8 +65,10 @@ namespace ntt { array_t dx1_prev, dx2_prev, dx3_prev; // Array to tag the particles array_t tag; - // Array to store the particle payloads - array_t pld; + // Array to store real-valued payloads + array_t pld_r; + // Array to store integer-valued payloads + array_t pld_i; // phi coordinate (for axisymmetry) array_t phi; @@ -76,9 +83,11 @@ namespace ntt { * @param ch The charge of the species * @param maxnpart The maximum number of allocated particles for the species * @param pusher The pusher assigned for the species + * @param use_tracking Use particle tracking for the species * @param use_gca Use hybrid GCA pusher for the species * @param cooling The cooling mechanism assigned for the species - * @param npld The number of payloads for the species + * @param npld_r The number of real-valued payloads for the species + * @param npld_i The number of integer-valued payloads for the species */ Particles(spidx_t index, const std::string& label, @@ -87,8 +96,10 @@ namespace ntt { npart_t maxnpart, const PrtlPusher& pusher, bool use_gca, + bool use_tracking, const Cooling& cooling, - unsigned short npld = 0); + unsigned short npld_r = 0, + unsigned short npld_i = 0); /** * @brief Constructor for the particle container @@ -102,9 +113,11 @@ namespace ntt { spec.charge(), spec.maxnpart(), spec.pusher(), + spec.use_tracking(), spec.use_gca(), spec.cooling(), - spec.npld()) {} + spec.npld_r(), + spec.npld_i()) {} Particles(const Particles&) = delete; Particles& operator=(const Particles&) = delete; @@ -136,6 +149,17 @@ namespace ntt { return m_npart; } + /** + * @brief Get the particle counter + */ + [[nodiscard]] + auto counter() const -> npart_t { + return m_counter; + } + + /** + * @brief Check if particles are sorted by tag + */ [[nodiscard]] auto is_sorted() const -> bool { return m_is_sorted; @@ -169,8 +193,9 @@ namespace ntt { footprint += sizeof(prtldx_t) * dx2_prev.extent(0); footprint += sizeof(prtldx_t) * dx3_prev.extent(0); footprint += sizeof(short) * tag.extent(0); - footprint += sizeof(real_t) * pld.extent(0) * pld.extent(1); - footprint += sizeof(real_t) * phi.extent(0); + footprint += sizeof(real_t) * pld_r.extent(0) * pld_r.extent(1); + footprint += sizeof(npart_t) * pld_i.extent(0) * pld_i.extent(1); + footprint += sizeof(real_t) * phi.extent(0); return footprint; } @@ -206,6 +231,14 @@ namespace ntt { m_npart = n; } + /** + * @brief Set the particle counter + * @param n The counter value as a npart_t + */ + void set_counter(npart_t n) { + m_counter = n; + } + void set_unsorted() { m_is_sorted = false; } @@ -220,7 +253,11 @@ namespace ntt { */ void SyncHostDevice(); - // void PrintTags(); +#if defined(OUTPUT_ENABLED) + void CheckpointDeclare(adios2::IO&) const; + void CheckpointRead(adios2::IO&, adios2::Engine&, std::size_t, std::size_t); + void CheckpointWrite(adios2::IO&, adios2::Engine&, std::size_t, std::size_t) const; +#endif }; } // namespace ntt diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp new file mode 100644 index 000000000..870a6e0ae --- /dev/null +++ b/src/framework/containers/particles_io.cpp @@ -0,0 +1,482 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" +#include "utils/formatting.h" +#include "utils/log.h" + +#include "framework/containers/particles.h" +#include "output/utils/readers.h" +#include "output/utils/writers.h" + +#include + +#if defined(MPI_ENABLED) + #include +#endif + +namespace ntt { + + template + void Particles::CheckpointDeclare(adios2::IO& io) const { + logger::Checkpoint( + fmt::format("Declaring particle checkpoint for species #%d", index()), + HERE); + io.DefineVariable(fmt::format("s%d_npart", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("s%d_counter", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + for (auto d { 0u }; d < static_cast(D); ++d) { + io.DefineVariable(fmt::format("s%d_i%d", index(), d + 1), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("s%d_dx%d", index(), d + 1), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("s%d_i%d_prev", index(), d + 1), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("s%d_dx%d_prev", index(), d + 1), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + + if constexpr (D == Dim::_2D and C != ntt::Coord::Cart) { + io.DefineVariable(fmt::format("s%d_phi", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + + for (auto d { 0u }; d < 3; ++d) { + io.DefineVariable(fmt::format("s%d_ux%d", index(), d + 1), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + + io.DefineVariable(fmt::format("s%d_tag", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("s%d_weight", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + if (npld_r() > 0) { + io.DefineVariable(fmt::format("s%d_pld_r", index()), + { adios2::UnknownDim, npld_r() }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, npld_r() }); + } + if (npld_i() > 0) { + io.DefineVariable(fmt::format("s%d_pld_i", index()), + { adios2::UnknownDim, npld_i() }, + { adios2::UnknownDim, 0 }, + { adios2::UnknownDim, npld_i() }); + } + } + + template + void Particles::CheckpointRead(adios2::IO& io, + adios2::Engine& reader, + std::size_t domains_total, + std::size_t domains_offset) { + logger::Checkpoint( + fmt::format("Reading particle checkpoint for species #%d", index()), + HERE); + raise::ErrorIf(npart() > 0, + "Particles already initialized before reading checkpoint", + HERE); + npart_t npart_offset = 0u; + + out::ReadVariable(io, + reader, + fmt::format("s%d_npart", index()), + m_npart, + domains_offset); + + raise::ErrorIf( + npart() > maxnpart(), + fmt::format("npart %d > maxnpart %d after reading checkpoint", + npart(), + maxnpart()), + HERE); + +#if defined(MPI_ENABLED) + { + std::vector glob_nparts(domains_total); + MPI_Allgather(&m_npart, + 1, + mpi::get_type(), + glob_nparts.data(), + 1, + mpi::get_type(), + MPI_COMM_WORLD); + for (auto d { 0u }; d < domains_offset; ++d) { + npart_offset += glob_nparts[d]; + } + } +#endif + out::ReadVariable(io, + reader, + fmt::format("s%d_counter", index()), + m_counter, + domains_offset); + + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + out::Read1DArray(io, + reader, + fmt::format("s%d_i1", index()), + i1, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx1", index()), + dx1, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_i1_prev", index()), + i1_prev, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx1_prev", index()), + dx1_prev, + npart(), + npart_offset); + } + + if constexpr (D == Dim::_2D or D == Dim::_3D) { + out::Read1DArray(io, + reader, + fmt::format("s%d_i2", index()), + i2, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx2", index()), + dx2, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_i2_prev", index()), + i2_prev, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx2_prev", index()), + dx2_prev, + npart(), + npart_offset); + } + + if constexpr (D == Dim::_3D) { + out::Read1DArray(io, + reader, + fmt::format("s%d_i3", index()), + i3, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx3", index()), + dx3, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_i3_prev", index()), + i3_prev, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_dx3_prev", index()), + dx3_prev, + npart(), + npart_offset); + } + + if constexpr (D == Dim::_2D and C != Coord::Cart) { + out::Read1DArray(io, + reader, + fmt::format("s%d_phi", index()), + phi, + npart(), + npart_offset); + } + + out::Read1DArray(io, + reader, + fmt::format("s%d_ux1", index()), + ux1, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_ux2", index()), + ux2, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_ux3", index()), + ux3, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_tag", index()), + tag, + npart(), + npart_offset); + out::Read1DArray(io, + reader, + fmt::format("s%d_weight", index()), + weight, + npart(), + npart_offset); + + if (npld_r() > 0) { + out::Read2DArray(io, + reader, + fmt::format("s%d_pld_r", index()), + pld_r, + npld_r(), + npart(), + npart_offset); + } + + if (npld_i() > 0) { + out::Read2DArray(io, + reader, + fmt::format("s%d_pld_i", index()), + pld_i, + npld_i(), + npart(), + npart_offset); + } + } + + template + void Particles::CheckpointWrite(adios2::IO& io, + adios2::Engine& writer, + std::size_t domains_total, + std::size_t domains_offset) const { + logger::Checkpoint( + fmt::format("Writing particle checkpoint for species #%d", index()), + HERE); + + npart_t npart_offset = 0u; + npart_t npart_total = npart(); + +#if defined(MPI_ENABLED) + { + std::vector glob_nparts(domains_total); + MPI_Allgather(&m_npart, + 1, + mpi::get_type(), + glob_nparts.data(), + 1, + mpi::get_type(), + MPI_COMM_WORLD); + npart_total = 0u; + for (auto r = 0; r < domains_total; ++r) { + if (r < domains_offset) { + npart_offset += glob_nparts[r]; + } + npart_total += glob_nparts[r]; + } + } +#endif + + out::WriteVariable(io, + writer, + fmt::format("s%d_npart", index()), + npart(), + domains_total, + domains_offset); + out::WriteVariable(io, + writer, + fmt::format("s%d_counter", index()), + npart(), + domains_total, + domains_offset); + + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + out::Write1DArray(io, + writer, + fmt::format("s%d_i1", index()), + i1, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx1", index()), + dx1, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_i1_prev", index()), + i1_prev, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx1_prev", index()), + dx1_prev, + npart_total, + npart_offset); + } + + if constexpr (D == Dim::_2D or D == Dim::_3D) { + out::Write1DArray(io, + writer, + fmt::format("s%d_i2", index()), + i2, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx2", index()), + dx2, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_i2_prev", index()), + i2_prev, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx2_prev", index()), + dx2_prev, + npart_total, + npart_offset); + } + + if constexpr (D == Dim::_3D) { + out::Write1DArray(io, + writer, + fmt::format("s%d_i3", index()), + i3, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx3", index()), + dx3, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_i3_prev", index()), + i3_prev, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_dx3_prev", index()), + dx3_prev, + npart_total, + npart_offset); + } + + if constexpr (D == Dim::_2D and C != Coord::Cart) { + out::Write1DArray(io, + writer, + fmt::format("s%d_phi", index()), + phi, + npart_total, + npart_offset); + } + + out::Write1DArray(io, + writer, + fmt::format("s%d_ux1", index()), + ux1, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_ux2", index()), + ux2, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_ux3", index()), + ux3, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_tag", index()), + tag, + npart_total, + npart_offset); + out::Write1DArray(io, + writer, + fmt::format("s%d_weight", index()), + weight, + npart_total, + npart_offset); + if (npld_r() > 0) { + out::Write2DArray(io, + writer, + fmt::format("s%d_pld_r", index()), + pld_r, + npart_total, + npart_offset, + npld_r()); + } + + if (npld_i() > 0) { + out::Write2DArray(io, + writer, + fmt::format("s%d_pld_i", index()), + pld_i, + npart_total, + npart_offset, + npld_i()); + } + } + +#define PARTICLES_CHECKPOINTS(D, C) \ + template void Particles::CheckpointDeclare(adios2::ADIOS&) const; \ + template void Particles::CheckpointRead(adios2::ADIOS&, \ + adios2::Engine&, \ + std::size_t, \ + std::size_t); \ + template void Particles::CheckpointWrite(adios2::IO&, \ + adios2::Engine&, \ + std::size_t, \ + std::size_t) const; \ + PARTICLES_CHECKPOINTS(Dim::_1D, Coord::Cart) \ + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Cart) \ + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Cart) \ + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Sph) \ + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::QSph) \ + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Sph) \ + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::QSph) +#undef PARTICLES_CHECKPOINTS + +} // namespace ntt diff --git a/src/framework/containers/species.h b/src/framework/containers/species.h index ada0282e2..baf024874 100644 --- a/src/framework/containers/species.h +++ b/src/framework/containers/species.h @@ -33,6 +33,9 @@ namespace ntt { // Pusher assigned for the species const PrtlPusher m_pusher; + // Use particle tracking for the species + const bool m_use_tracking; + // Use byrid gca pusher for the species const bool m_use_gca; @@ -40,7 +43,8 @@ namespace ntt { const Cooling m_cooling; // Number of payloads for the species - const unsigned short m_npld; + const unsigned short m_npld_r; + const unsigned short m_npld_i; public: ParticleSpecies() @@ -50,9 +54,11 @@ namespace ntt { , m_charge { 0.0 } , m_maxnpart { 0 } , m_pusher { PrtlPusher::INVALID } + , m_use_tracking { false } , m_use_gca { false } , m_cooling { Cooling::INVALID } - , m_npld { 0 } {} + , m_npld_r { 0 } + , m_npld_i { 0 } {} /** * @brief Constructor for the particle species container. @@ -63,6 +69,11 @@ namespace ntt { * @param ch The charge of the species. * @param maxnpart The maximum number of allocated particles for the species. * @param pusher The pusher assigned for the species. + * @param use_tracking Use particle tracking for the species. + * @param use_gca Use hybrid GCA pusher for the species. + * @param cooling The cooling mechanism assigned for the species. + * @param npld_r The number of real-valued payloads for the species + * @param npld_i The number of integer-valued payloads for the species */ ParticleSpecies(spidx_t index, const std::string& label, @@ -70,18 +81,35 @@ namespace ntt { float ch, npart_t maxnpart, const PrtlPusher& pusher, + bool use_tracking, bool use_gca, const Cooling& cooling, - unsigned short npld = 0) + unsigned short npld_r = 0, + unsigned short npld_i = 0) : m_index { index } , m_label { std::move(label) } , m_mass { m } , m_charge { ch } , m_maxnpart { maxnpart } , m_pusher { pusher } + , m_use_tracking { use_tracking } , m_use_gca { use_gca } , m_cooling { cooling } - , m_npld { npld } {} + , m_npld_r { npld_r } + , m_npld_i { npld_i } { + if (use_tracking) { +#if !defined(MPI_ENABLED) + raise::ErrorIf(m_npld_i < 1, + "npld_i must be at least 1 when tracking is enabled", + HERE); +#else + raise::ErrorIf( + m_npld_i < 2, + "npld_i must be at least 2 when tracking is enabled with MPI", + HERE); +#endif + } + } ParticleSpecies(const ParticleSpecies&) = default; @@ -120,6 +148,11 @@ namespace ntt { return m_pusher; } + [[nodiscard]] + auto use_tracking() const -> bool { + return m_use_tracking; + } + [[nodiscard]] auto use_gca() const -> bool { return m_use_gca; @@ -131,8 +164,13 @@ namespace ntt { } [[nodiscard]] - auto npld() const -> unsigned short { - return m_npld; + auto npld_r() const -> unsigned short { + return m_npld_r; + } + + [[nodiscard]] + auto npld_i() const -> unsigned short { + return m_npld_i; } }; } // namespace ntt diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index e0e34f993..8fcefa989 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -42,9 +42,10 @@ namespace ntt { } auto loc_shape_with_ghosts = local_domain->mesh.n_all(); - std::vector nplds; + std::vector npld_r, npld_i; for (auto s { 0u }; s < local_domain->species.size(); ++s) { - nplds.push_back(local_domain->species[s].npld()); + npld_r.push_back(local_domain->species[s].npld_r()); + npld_i.push_back(local_domain->species[s].npld_i()); } const path_t checkpoint_root = params.template get( @@ -62,10 +63,9 @@ namespace ntt { glob_shape_with_ghosts, off_ncells_with_ghosts, loc_shape_with_ghosts); - g_checkpoint_writer.defineParticleVariables(M::CoordType, - M::Dim, - local_domain->species.size(), - nplds); + for (auto& species : local_domain->species) { + species.CheckpointDeclare(g_checkpoint_writer.io()); + } } } @@ -96,165 +96,17 @@ namespace ntt { g_checkpoint_writer.saveField("em0", local_domain->fields.em0); g_checkpoint_writer.saveField("cur0", local_domain->fields.cur0); } - std::size_t dom_offset = 0, dom_tot = 1; + std::size_t dom_tot = 1, dom_offset = 0; #if defined(MPI_ENABLED) - dom_offset = g_mpi_rank; dom_tot = g_mpi_size; + dom_offset = g_mpi_rank; #endif // MPI_ENABLED - for (auto s { 0u }; s < local_domain->species.size(); ++s) { - auto npart = local_domain->species[s].npart(); - npart_t offset = 0; - auto glob_tot = npart; -#if defined(MPI_ENABLED) - auto glob_npart = std::vector(g_ndomains); - MPI_Allgather(&npart, - 1, - mpi::get_type(), - glob_npart.data(), - 1, - mpi::get_type(), - MPI_COMM_WORLD); - glob_tot = 0; - for (auto r = 0; r < g_mpi_size; ++r) { - if (r < g_mpi_rank) { - offset += glob_npart[r]; - } - glob_tot += glob_npart[r]; - } -#endif // MPI_ENABLED - g_checkpoint_writer.savePerDomainVariable( - fmt::format("s%d_npart", s + 1), - dom_tot, - dom_offset, - npart); - if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or - M::Dim == Dim::_3D) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i1", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i1); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx1", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx1); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i1_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i1_prev); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx1_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx1_prev); - } - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i2", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i2); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx2", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx2); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i2_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i2_prev); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx2_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx2_prev); - } - if constexpr (M::Dim == Dim::_3D) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i3", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i3); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx3", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx3); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_i3_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].i3_prev); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_dx3_prev", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].dx3_prev); - } - if constexpr (M::Dim == Dim::_2D and M::CoordType != Coord::Cart) { - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_phi", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].phi); - } - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_ux1", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].ux1); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_ux2", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].ux2); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_ux3", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].ux3); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_tag", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].tag); - g_checkpoint_writer.saveParticleQuantity( - fmt::format("s%d_weight", s + 1), - glob_tot, - offset, - npart, - local_domain->species[s].weight); - - auto nplds = local_domain->species[s].npld(); - if (nplds > 0) { - g_checkpoint_writer.saveParticlePayloads(fmt::format("s%d_plds", s + 1), - nplds, - glob_tot, - offset, - npart, - local_domain->species[s].pld); - } + for (const auto& species : local_domain->species) { + species.CheckpointWrite(g_checkpoint_writer.io(), + g_checkpoint_writer.writer(), + dom_tot, + dom_offset); } } g_checkpoint_writer.endSaving(); @@ -284,8 +136,8 @@ namespace ntt { #endif reader.BeginStep(); - for (auto& ldidx : l_subdomain_indices()) { - auto& domain = g_subdomains[ldidx]; + for (const auto local_domain_idx : l_subdomain_indices()) { + auto& domain = g_subdomains[local_domain_idx]; adios2::Box range; for (auto d { 0u }; d < M::Dim; ++d) { range.first.push_back(domain.offset_ncells()[d] + @@ -315,163 +167,10 @@ namespace ntt { range3, domain.fields.cur0); } - for (auto s { 0u }; s < domain.species.size(); ++s) { - const auto [loc_npart, offset_npart] = - checkpoint::ReadParticleCount(io, reader, s, ldidx, ndomains()); - raise::ErrorIf(loc_npart > domain.species[s].maxnpart(), - "loc_npart > domain.species[s].maxnpart()", - HERE); - if (loc_npart == 0) { - continue; - } - if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or - M::Dim == Dim::_3D) { - checkpoint::ReadParticleData(io, - reader, - "i1", - s, - domain.species[s].i1, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx1", - s, - domain.species[s].dx1, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "i1_prev", - s, - domain.species[s].i1_prev, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx1_prev", - s, - domain.species[s].dx1_prev, - loc_npart, - offset_npart); - } - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - checkpoint::ReadParticleData(io, - reader, - "i2", - s, - domain.species[s].i2, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx2", - s, - domain.species[s].dx2, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "i2_prev", - s, - domain.species[s].i2_prev, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx2_prev", - s, - domain.species[s].dx2_prev, - loc_npart, - offset_npart); - } - if constexpr (M::Dim == Dim::_3D) { - checkpoint::ReadParticleData(io, - reader, - "i3", - s, - domain.species[s].i3, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx3", - s, - domain.species[s].dx3, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "i3_prev", - s, - domain.species[s].i3_prev, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "dx3_prev", - s, - domain.species[s].dx3_prev, - loc_npart, - offset_npart); - } - if constexpr (M::Dim == Dim::_2D and M::CoordType != Coord::Cart) { - checkpoint::ReadParticleData(io, - reader, - "phi", - s, - domain.species[s].phi, - loc_npart, - offset_npart); - } - checkpoint::ReadParticleData(io, - reader, - "ux1", - s, - domain.species[s].ux1, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "ux2", - s, - domain.species[s].ux2, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "ux3", - s, - domain.species[s].ux3, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "tag", - s, - domain.species[s].tag, - loc_npart, - offset_npart); - checkpoint::ReadParticleData(io, - reader, - "weight", - s, - domain.species[s].weight, - loc_npart, - offset_npart); - const auto nplds = domain.species[s].npld(); - if (nplds > 0) { - checkpoint::ReadParticlePayloads(io, - reader, - s, - domain.species[s].pld, - nplds, - loc_npart, - offset_npart); - } - domain.species[s].set_npart(loc_npart); - } // species loop + for (auto& species : domain.species) { + species.CheckpointRead(io, reader, local_domain_idx, ndomains()); + } } // local subdomain loop diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 884551e2c..27906b6e4 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -200,10 +200,21 @@ namespace ntt { const auto maxnpart_real = toml::find(sp, "maxnpart"); const auto maxnpart = static_cast(maxnpart_real); auto pusher = toml::find_or(sp, "pusher", std::string(def_pusher)); - const auto npayloads = toml::find_or(sp, - "n_payloads", - static_cast(0)); - const auto cooling = toml::find_or(sp, "cooling", std::string("None")); + const auto npayloads_real = toml::find_or(sp, + "n_payloads_real", + static_cast(0)); + const auto use_tracking = toml::find_or(sp, "tracking", false); + auto npayloads_int = toml::find_or(sp, + "n_payloads_int", + static_cast(0)); + if (use_tracking) { +#if !defined(MPI_ENABLED) + npayloads_int += 1; +#else + npayloads_int += 2; +#endif + } + const auto cooling = toml::find_or(sp, "cooling", std::string("None")); raise::ErrorIf((fmt::toLower(cooling) != "none") && is_massless, "cooling is only applicable to massive particles", HERE); @@ -241,9 +252,11 @@ namespace ntt { charge, maxnpart, pusher_enum, + use_tracking, use_gca, cooling_enum, - npayloads)); + npayloads_real, + npayloads_int)); idx += 1; } set("particles.species", species); @@ -468,9 +481,11 @@ namespace ntt { particle_species.charge(), maxnpart, particle_species.pusher(), + particle_species.use_tracking(), particle_species.use_gca(), particle_species.cooling(), - particle_species.npld()); + particle_species.npld_r(), + particle_species.npld_i()); idxM1++; } set("particles.species", new_species); diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 07b2c11b3..c9475d688 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -12,7 +12,6 @@ #include #include -#include using namespace toml::literals::toml_literals; const auto mink_1d = u8R"( @@ -55,7 +54,8 @@ const auto mink_1d = u8R"( charge = -1.0 maxnpart = 1e2 pusher = "boris" - n_payloads = 3 + n_payloads_real = 3 + tracking = true [[particles.species]] label = "p+" @@ -139,7 +139,7 @@ const auto sph_2d = u8R"( charge = -1.0 maxnpart = 1e2 pusher = "boris,gca" - n_payloads = 3 + n_payloads_real = 3 cooling = "synchrotron" [[particles.species]] @@ -149,6 +149,7 @@ const auto sph_2d = u8R"( maxnpart = 1e2 pusher = "boris,gca" cooling = "synchrotron" + n_payloads_int = 2 [[particles.species]] label = "ph" @@ -297,7 +298,9 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[0].pusher(), PrtlPusher::BORIS, "species[0].pusher"); - assert_equal(species[0].npld(), 3, "species[0].npld"); + assert_equal(species[0].npld_r(), 3, "species[0].npld_r"); + assert_equal(species[0].npld_i(), 1, "species[0].npld_i"); + assert_equal(species[0].use_tracking(), true, "species[0].tracking"); assert_equal(species[1].label(), "p+", "species[1].label"); assert_equal(species[1].mass(), 1.0f, "species[1].mass"); @@ -306,7 +309,7 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[1].pusher(), PrtlPusher::VAY, "species[1].pusher"); - assert_equal(species[1].npld(), 0, "species[1].npld"); + assert_equal(species[1].npld_r(), 0, "species[1].npld_r"); assert_equal(params_mink_1d.get("setup.myfloat"), (real_t)(1e-2), @@ -417,7 +420,7 @@ auto main(int argc, char* argv[]) -> int { PrtlPusher::BORIS, "species[0].pusher"); assert_equal(species[0].use_gca(), true, "species[0].use_gca"); - assert_equal(species[0].npld(), 3, "species[0].npld"); + assert_equal(species[0].npld_r(), 3, "species[0].npld_r"); assert_equal(species[0].cooling(), Cooling::SYNCHROTRON, "species[0].cooling"); @@ -430,10 +433,12 @@ auto main(int argc, char* argv[]) -> int { PrtlPusher::BORIS, "species[1].pusher"); assert_equal(species[1].use_gca(), true, "species[1].use_gca"); - assert_equal(species[1].npld(), 0, "species[1].npld"); + assert_equal(species[1].npld_r(), 0, "species[1].npld_r"); assert_equal(species[1].cooling(), Cooling::SYNCHROTRON, "species[1].cooling"); + assert_equal(species[1].npld_i(), 2, "species[1].npld_i"); + assert_equal(species[1].use_tracking(), false, "species[1].tracking"); assert_equal(species[2].label(), "ph", "species[2].label"); assert_equal(species[2].mass(), 0.0f, "species[2].mass"); @@ -442,7 +447,7 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[2].pusher(), PrtlPusher::PHOTON, "species[2].pusher"); - assert_equal(species[2].npld(), 0, "species[2].npld"); + assert_equal(species[2].npld_r(), 0, "species[2].npld_r"); } { @@ -551,7 +556,7 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[0].pusher(), PrtlPusher::BORIS, "species[0].pusher"); - assert_equal(species[0].npld(), 0, "species[0].npld"); + assert_equal(species[0].npld_r(), 0, "species[0].npld_r"); assert_equal(species[1].label(), "e+", "species[1].label"); assert_equal(species[1].mass(), 1.0f, "species[1].mass"); @@ -560,7 +565,7 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[1].pusher(), PrtlPusher::BORIS, "species[1].pusher"); - assert_equal(species[1].npld(), 0, "species[1].npld"); + assert_equal(species[1].npld_r(), 0, "species[1].npld_r"); } } catch (std::exception& err) { diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index 6c4c227b5..ab3aa0e04 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -4,24 +4,33 @@ #include "global.h" #include "utils/error.h" -#include "utils/formatting.h" #include -#include #include -#include template -void testParticles(const int& index, +void testParticles(int index, const std::string& label, - const float& m, - const float& ch, - const std::size_t& maxnpart, + float m, + float ch, + std::size_t maxnpart, const ntt::PrtlPusher& pusher, + bool use_tracking, const ntt::Cooling& cooling, - const unsigned short& npld = 0) { + unsigned short npld_r = 0, + unsigned short npld_i = 0) { using namespace ntt; - auto p = Particles(index, label, m, ch, maxnpart, pusher, false, cooling, npld); + auto p = Particles(index, + label, + m, + ch, + maxnpart, + pusher, + use_tracking, + false, + cooling, + npld_r, + npld_i); raise::ErrorIf(p.index() != index, "Index mismatch", HERE); raise::ErrorIf(p.label() != label, "Label mismatch", HERE); raise::ErrorIf(p.mass() != m, "Mass mismatch", HERE); @@ -46,9 +55,13 @@ void testParticles(const int& index, raise::ErrorIf(p.tag.extent(0) != maxnpart, "tag incorrectly allocated", HERE); raise::ErrorIf(p.weight.extent(0) != maxnpart, "weight incorrectly allocated", HERE); - if (npld > 0) { - raise::ErrorIf(p.pld.extent(0) != maxnpart, "pld incorrectly allocated", HERE); - raise::ErrorIf(p.pld.extent(1) != npld, "pld incorrectly allocated", HERE); + if (npld_r > 0) { + raise::ErrorIf(p.pld_r.extent(0) != maxnpart, "pld_r incorrectly allocated", HERE); + raise::ErrorIf(p.pld_r.extent(1) != npld_r, "pld_r incorrectly allocated", HERE); + } + if (npld_i > 0) { + raise::ErrorIf(p.pld_i.extent(0) != maxnpart, "pld_i incorrectly allocated", HERE); + raise::ErrorIf(p.pld_i.extent(1) != npld_i, "pld_i incorrectly allocated", HERE); } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { @@ -103,6 +116,7 @@ auto main(int argc, char** argv) -> int { -1.0, 100, PrtlPusher::BORIS, + false, Cooling::SYNCHROTRON); testParticles(2, "p+", @@ -110,13 +124,17 @@ auto main(int argc, char** argv) -> int { -1.0, 1000, PrtlPusher::VAY, - Cooling::SYNCHROTRON); + true, + Cooling::SYNCHROTRON, + 2, + 1); testParticles(3, "ph", 0.0, 0.0, 100, PrtlPusher::PHOTON, + false, Cooling::NONE, 5); testParticles(4, @@ -125,15 +143,20 @@ auto main(int argc, char** argv) -> int { 1.0, 100, PrtlPusher::BORIS, - Cooling::NONE); + true, + Cooling::NONE, + 2, + 3); testParticles(5, "e+", 1.0, 1.0, 100, PrtlPusher::BORIS, + false, Cooling::NONE, - 1); + 1, + 2); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; Kokkos::finalize(); diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index 1b132fb60..ef20dae56 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -30,6 +30,8 @@ set(SOURCES ${SRC_DIR}/stats.cpp ${SRC_DIR}/fields.cpp ${SRC_DIR}/utils/interpret_prompt.cpp) if(${output}) list(APPEND SOURCES ${SRC_DIR}/writer.cpp) + list(APPEND SOURCES ${SRC_DIR}/utils/writers.cpp) + list(APPEND SOURCES ${SRC_DIR}/utils/readers.cpp) endif() add_library(ntt_output ${SOURCES}) diff --git a/src/output/tests/writer-nompi.cpp b/src/output/tests/writer-nompi.cpp index 66d834f43..a185b1777 100644 --- a/src/output/tests/writer-nompi.cpp +++ b/src/output/tests/writer-nompi.cpp @@ -18,8 +18,8 @@ using namespace ntt; void cleanup() { namespace fs = std::filesystem; - fs::path tempfile_path { "test.h5" }; - fs::remove(tempfile_path); + fs::path tempfile_path { "test.bp" }; + fs::remove_all(tempfile_path); } #define CEILDIV(a, b) \ @@ -71,7 +71,7 @@ auto main(int argc, char* argv[]) -> int { { // write auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test", false); + writer.init(&adios, "bpfile", "test", false); writer.defineMeshLayout({ nx1, nx2, nx3 }, { 0, 0, 0 }, { nx1, nx2, nx3 }, @@ -100,19 +100,20 @@ auto main(int argc, char* argv[]) -> int { { // read adios2::IO io = adios.DeclareIO("read-test"); - io.SetEngine("hdf5"); - adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read); - const auto layoutRight = io.InquireAttribute("LayoutRight").Data()[0] == - 1; - - raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, - "NGhosts is not correct", - HERE); - raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 3, - "Dimension is not correct", - HERE); + io.SetEngine("BPFile"); + adios2::Engine reader = io.Open("test.bp", adios2::Mode::Read); for (auto step = 0u; reader.BeginStep() == adios2::StepStatus::OK; ++step) { + const auto layoutRight = io.InquireAttribute("LayoutRight").Data()[0] == + 1; + + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 3, + "Dimension is not correct", + HERE); + timestep_t step_read; simtime_t time_read; diff --git a/src/output/utils/attr_writer.h b/src/output/utils/attr_writer.h index c8b21e4c2..41b64fb75 100644 --- a/src/output/utils/attr_writer.h +++ b/src/output/utils/attr_writer.h @@ -7,6 +7,7 @@ * @namespaces: * - out:: */ + #ifndef OUTPUT_UTILS_ATTR_WRITER_H #define OUTPUT_UTILS_ATTR_WRITER_H diff --git a/src/output/utils/interpret_prompt.h b/src/output/utils/interpret_prompt.h index 032482cf8..488d81101 100644 --- a/src/output/utils/interpret_prompt.h +++ b/src/output/utils/interpret_prompt.h @@ -26,8 +26,8 @@ namespace out { auto InterpretSpecies(const std::string&) -> std::vector; - auto InterpretComponents( - const std::vector&) -> std::vector>; + auto InterpretComponents(const std::vector&) + -> std::vector>; } // namespace out diff --git a/src/output/utils/readers.cpp b/src/output/utils/readers.cpp new file mode 100644 index 000000000..d2e866af1 --- /dev/null +++ b/src/output/utils/readers.cpp @@ -0,0 +1,99 @@ +#include "output/utils/readers.h" + +#include "global.h" + +#include "arch/kokkos_aliases.h" +#include "utils/error.h" +#include "utils/formatting.h" + +#include + +#include + +namespace out { + + template + void ReadVariable(adios2::IO& io, + adios2::Engine& reader, + const std::string& quantity, + T& data, + std::size_t local_offset) { + auto var = io.InquireVariable(quantity); + if (var) { + var.SetSelection(adios2::Box({ local_offset }, { 1 })); + reader.Get(var, &data, adios2::Mode::Sync); + } else { + raise::Error(fmt::format("Variable: %s not found", quantity.c_str()), HERE); + } + } + + template + void Read1DArray(adios2::IO& io, + adios2::Engine& reader, + const std::string& quantity, + array_t& data, + std::size_t local_size, + std::size_t local_offset) { + auto var = io.InquireVariable(quantity); + if (var) { + var.SetSelection(adios2::Box({ local_offset }, { local_size })); + const auto slice = range_tuple_t(0, local_size); + auto data_h = Kokkos::create_mirror_view(data); + reader.Get(var, Kokkos::subview(data_h, slice).data(), adios2::Mode::Sync); + Kokkos::deep_copy(Kokkos::subview(data, slice), + Kokkos::subview(data_h, slice)); + } else { + raise::Error(fmt::format("Variable: %s not found", quantity.c_str()), HERE); + } + } + + template + void Read2DArray(adios2::IO& io, + adios2::Engine& reader, + const std::string& quantity, + array_t& data, + unsigned short dim2_size, + std::size_t local_size, + std::size_t local_offset) { + auto var = io.InquireVariable(quantity); + if (var) { + var.SetSelection(adios2::Box({ local_offset, 0 }, + { local_size, dim2_size })); + const auto slice = range_tuple_t(0, local_size); + auto data_h = Kokkos::create_mirror_view(data); + reader.Get(var, + Kokkos::subview(data_h, slice, range_tuple_t(0, dim2_size)).data(), + adios2::Mode::Sync); + Kokkos::deep_copy(data, data_h); + } else { + raise::Error(fmt::format("Variable: %s not found", quantity.c_str()), HERE); + } + } + +#define ARRAY_READERS(T) \ + template void ReadVariable(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + T&, \ + std::size_t); \ + template void Read1DArray(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + array_t&, \ + std::size_t, \ + std::size_t); \ + template void Read2DArray(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + array_t&, \ + unsigned short, \ + std::size_t, \ + std::size_t); \ + ARRAY_READERS(int) \ + ARRAY_READERS(unsigned int) \ + ARRAY_READERS(unsigned long int) \ + ARRAY_READERS(double) \ + ARRAY_READERS(float) +#undef ARRAY_READERS + +} // namespace out diff --git a/src/output/utils/readers.h b/src/output/utils/readers.h new file mode 100644 index 000000000..b4fdc8ab0 --- /dev/null +++ b/src/output/utils/readers.h @@ -0,0 +1,48 @@ +/** + * @file output/utils/readers.h + * @brief + * Defines generic reader functions. + * @implements + * - out::ReadVariable<> -> void + * - out::Read1DArray<> -> void + * - out::Read2DArray<> -> void + * @cpp: + * - readers.cpp + * @namespaces: + * - out:: + */ + +#ifndef OUTPUT_UTILS_READERS_H +#define OUTPUT_UTILS_READERS_H + +#include "arch/kokkos_aliases.h" + +#include + +#include + +namespace out { + + template + void ReadVariable(adios2::IO&, adios2::Engine&, const std::string&, T&, std::size_t); + + template + void Read1DArray(adios2::IO&, + adios2::Engine&, + const std::string&, + array_t&, + std::size_t, + std::size_t); + + template + void Read2DArray(adios2::IO&, + adios2::Engine&, + const std::string&, + array_t&, + unsigned short, + std::size_t, + std::size_t); + +} // namespace out + +#endif // OUTPUT_UTILS_READERS_H diff --git a/src/output/utils/writers.cpp b/src/output/utils/writers.cpp new file mode 100644 index 000000000..c7d1bbd74 --- /dev/null +++ b/src/output/utils/writers.cpp @@ -0,0 +1,94 @@ +#include "output/utils/writers.h" + +#include "arch/kokkos_aliases.h" + +#include + +#include + +namespace out { + + template + void WriteVariable(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const T& data, + std::size_t global_size, + std::size_t local_offset) { + auto var = io.InquireVariable(name); + var.SetShape({ global_size }); + var.SetSelection(adios2::Box({ local_offset }, { 1 })); + writer.Put(var, &data); + } + + template + void Write1DArray(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const array_t& data, + std::size_t local_size, + std::size_t local_offset, + std::size_t global_size) { + const auto slice = range_tuple_t(0, local_size); + auto var = io.InquireVariable(name); + var.SetShape({ global_size }); + var.SetSelection(adios2::Box({ local_offset }, { local_size })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice); + writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + + template + void Write2DArray(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const array_t& data, + unsigned short dim2_size, + std::size_t local_size, + std::size_t local_offset, + std::size_t global_size) { + const auto slice = range_tuple_t(0, local_size); + auto var = io.InquireVariable(name); + + var.SetShape({ global_size, dim2_size }); + var.SetSelection( + adios2::Box({ local_offset, 0 }, { local_size, dim2_size })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, dim2_size)); + writer.Put(var, data_sub.data(), adios2::Mode::Sync); + } + +#define ARRAY_WRITERS(T) \ + template void WriteVariable(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + const T&, \ + std::size_t, \ + std::size_t); \ + template void Write1DArray(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + const array_t&, \ + std::size_t, \ + std::size_t, \ + std::size_t); \ + template void Write2DArray(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + const array_t&, \ + unsigned short, \ + std::size_t, \ + std::size_t, \ + std::size_t); \ + ARRAY_WRITERS(int) \ + ARRAY_WRITERS(unsigned int) \ + ARRAY_WRITERS(unsigned long int) \ + ARRAY_WRITERS(double) \ + ARRAY_WRITERS(float) +#undef ARRAY_WRITERS + +} // namespace out diff --git a/src/output/utils/writers.h b/src/output/utils/writers.h new file mode 100644 index 000000000..3e4bd4cb8 --- /dev/null +++ b/src/output/utils/writers.h @@ -0,0 +1,55 @@ +/** + * @file output/utils/writers.h + * @brief + * Defines generic writer functions. + * @implements + * - out::WriteVariable<> -> void + * - out::Write1DArray<> -> void + * - out::Write2DArray<> -> void + * @cpp: + * - writers.cpp + * @namespaces: + * - out:: + */ + +#ifndef OUTPUT_UTILS_WRITERS_H +#define OUTPUT_UTILS_WRITERS_H + +#include "arch/kokkos_aliases.h" + +#include + +#include + +namespace out { + + template + void WriteVariable(adios2::IO&, + adios2::Engine&, + const std::string&, + const T&, + std::size_t, + std::size_t); + + template + void Write1DArray(adios2::IO&, + adios2::Engine&, + const std::string&, + const array_t&, + std::size_t, + std::size_t, + std::size_t); + + template + void Write2DArray(adios2::IO&, + adios2::Engine&, + const std::string&, + const array_t&, + unsigned short, + std::size_t, + std::size_t, + std::size_t); + +} // namespace out + +#endif // OUTPUT_UTILS_WRITERS_H From 479c0cf49bde984f73bcdd6924b3e945e44cb421 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 21 Oct 2025 16:04:53 -0400 Subject: [PATCH 089/154] hdf5 off by default --- cmake/adios2Config.cmake | 2 +- dev/nix/adios2.nix | 11 ++++++++--- src/global/defaults.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cmake/adios2Config.cmake b/cmake/adios2Config.cmake index a4ce46179..0ff3a4e89 100644 --- a/cmake/adios2Config.cmake +++ b/cmake/adios2Config.cmake @@ -13,7 +13,7 @@ set(ADIOS2_USE_Fortran # Format/compression support set(ADIOS2_USE_HDF5 - ON + OFF CACHE BOOL "Use HDF5 for ADIOS2") set(ADIOS2_USE_MPI diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index 0418b71cd..fb574c302 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -19,7 +19,7 @@ let BUILD_TESTING = "OFF"; ADIOS2_BUILD_EXAMPLES = "OFF"; ADIOS2_USE_MPI = if mpi then "ON" else "OFF"; - ADIOS2_HAVE_HDF5_VOL = if mpi then "ON" else "OFF"; + ADIOS2_HAVE_HDF5_VOL = if (mpi && hdf5) then "ON" else "OFF"; CMAKE_BUILD_TYPE = "Release"; }; stdenv = pkgs.gcc13Stdenv; @@ -40,8 +40,13 @@ stdenv.mkDerivation { propagatedBuildInputs = [ pkgs.gcc13 - ] ++ (if hdf5 then (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5-cpp ]) else [ ]); - # ++ (if mpi then [ pkgs.openmpi ] else [ ]); + ] + ++ ( + if hdf5 then + (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5-cpp ]) + else + (if mpi then [ pkgs.mpi ] else [ ]) + ); configurePhase = '' cmake -B build $src ${ diff --git a/src/global/defaults.h b/src/global/defaults.h index 9513493b1..b7ae6547d 100644 --- a/src/global/defaults.h +++ b/src/global/defaults.h @@ -51,7 +51,7 @@ namespace ntt::defaults { } // namespace bc namespace output { - const std::string format = "hdf5"; + const std::string format = "BPFile"; const timestep_t interval = 100; const unsigned short mom_smooth = 0; const npart_t prtl_stride = 100; From c70c3531e213ae33c37d575073f103a4a6ba0458 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 21 Oct 2025 16:07:58 -0400 Subject: [PATCH 090/154] checkpoint functionality for fields placed in fields struct --- src/CMakeLists.txt | 4 - src/checkpoint/CMakeLists.txt | 37 --- src/checkpoint/reader.cpp | 183 ----------- src/checkpoint/reader.h | 57 ---- src/checkpoint/tests/CMakeLists.txt | 30 -- src/checkpoint/tests/checkpoint-mpi.cpp | 273 ----------------- src/checkpoint/tests/checkpoint-nompi.cpp | 248 --------------- src/checkpoint/writer.cpp | 285 ------------------ src/framework/CMakeLists.txt | 7 +- src/framework/containers/fields.h | 16 + src/framework/containers/fields_io.cpp | 96 ++++++ src/framework/containers/particles_io.cpp | 64 ++-- src/framework/domain/checkpoint.cpp | 72 ++--- src/framework/domain/metadomain.h | 2 +- src/framework/parameters.cpp | 22 +- src/framework/parameters.h | 4 + src/output/CMakeLists.txt | 1 + src/output/checkpoint.cpp | 104 +++++++ .../writer.h => output/checkpoint.h} | 30 +- 19 files changed, 323 insertions(+), 1212 deletions(-) delete mode 100644 src/checkpoint/CMakeLists.txt delete mode 100644 src/checkpoint/reader.cpp delete mode 100644 src/checkpoint/reader.h delete mode 100644 src/checkpoint/tests/CMakeLists.txt delete mode 100644 src/checkpoint/tests/checkpoint-mpi.cpp delete mode 100644 src/checkpoint/tests/checkpoint-nompi.cpp delete mode 100644 src/checkpoint/writer.cpp create mode 100644 src/framework/containers/fields_io.cpp create mode 100644 src/output/checkpoint.cpp rename src/{checkpoint/writer.h => output/checkpoint.h} (65%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f2afc824..31114c330 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,10 +33,6 @@ add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) add_subdirectory(${SRC_DIR}/engines ${CMAKE_CURRENT_BINARY_DIR}/engines) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) -if(${output}) - add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) -endif() - set(ENTITY ${PROJECT_NAME}.xc) set(SOURCES ${SRC_DIR}/entity.cpp) diff --git a/src/checkpoint/CMakeLists.txt b/src/checkpoint/CMakeLists.txt deleted file mode 100644 index 096aad690..000000000 --- a/src/checkpoint/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -# cmake-lint: disable=C0103 -# ------------------------------ -# @defines: ntt_checkpoint [STATIC/SHARED] -# -# @sources: -# -# * writer.cpp -# * reader.cpp -# -# @includes: -# -# * ../ -# -# @depends: -# -# * ntt_global [required] -# -# @uses: -# -# * kokkos [required] -# * ADIOS2 [required] -# * mpi [optional] -# ------------------------------ - -set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SOURCES ${SRC_DIR}/writer.cpp ${SRC_DIR}/reader.cpp) -add_library(ntt_checkpoint ${SOURCES}) - -set(libs ntt_global) -add_dependencies(ntt_checkpoint ${libs}) -target_link_libraries(ntt_checkpoint PUBLIC ${libs}) -target_link_libraries(ntt_checkpoint PRIVATE stdc++fs) - -target_include_directories( - ntt_checkpoint - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../ - INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../) diff --git a/src/checkpoint/reader.cpp b/src/checkpoint/reader.cpp deleted file mode 100644 index b5d6f0c44..000000000 --- a/src/checkpoint/reader.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include "checkpoint/reader.h" - -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "utils/error.h" -#include "utils/formatting.h" -#include "utils/log.h" - -#include -#include - -#if defined(MPI_ENABLED) - #include -#endif - -#include -#include -#include - -namespace checkpoint { - - template - void ReadFields(adios2::IO& io, - adios2::Engine& reader, - const std::string& field, - const adios2::Box& range, - ndfield_t& array) { - logger::Checkpoint(fmt::format("Reading field: %s", field.c_str()), HERE); - auto field_var = io.InquireVariable(field); - if (field_var) { - field_var.SetSelection(range); - - auto array_h = Kokkos::create_mirror_view(array); - reader.Get(field_var, array_h.data(), adios2::Mode::Sync); - Kokkos::deep_copy(array, array_h); - } else { - raise::Error(fmt::format("Field variable: %s not found", field.c_str()), - HERE); - } - } - - auto ReadParticleCount(adios2::IO& io, - adios2::Engine& reader, - spidx_t s, - std::size_t local_dom, - std::size_t ndomains) -> std::pair { - logger::Checkpoint(fmt::format("Reading particle count for: %d", s + 1), HERE); - auto npart_var = io.InquireVariable(fmt::format("s%d_npart", s + 1)); - if (npart_var) { - raise::ErrorIf(npart_var.Shape()[0] != ndomains, - "npart_var.Shape()[0] != ndomains", - HERE); - raise::ErrorIf(npart_var.Shape().size() != 1, - "npart_var.Shape().size() != 1", - HERE); - npart_var.SetSelection(adios2::Box({ local_dom }, { 1 })); - npart_t npart; - reader.Get(npart_var, &npart, adios2::Mode::Sync); - const auto loc_npart = npart; -#if !defined(MPI_ENABLED) - npart_t offset_npart = 0; -#else - std::vector glob_nparts(ndomains); - MPI_Allgather(&loc_npart, - 1, - mpi::get_type(), - glob_nparts.data(), - 1, - mpi::get_type(), - MPI_COMM_WORLD); - npart_t offset_npart = 0; - for (auto d { 0u }; d < local_dom; ++d) { - offset_npart += glob_nparts[d]; - } -#endif - return { loc_npart, offset_npart }; - } else { - raise::Error("npart_var is not found", HERE); - return { 0, 0 }; - } - } - - template - void ReadParticleData(adios2::IO& io, - adios2::Engine& reader, - const std::string& quantity, - spidx_t s, - array_t& array, - npart_t count, - npart_t offset) { - logger::Checkpoint( - fmt::format("Reading quantity: s%d_%s", s + 1, quantity.c_str()), - HERE); - auto var = io.InquireVariable( - fmt::format("s%d_%s", s + 1, quantity.c_str())); - if (var) { - var.SetSelection(adios2::Box({ offset }, { count })); - const auto slice = range_tuple_t(0, count); - auto array_h = Kokkos::create_mirror_view(array); - reader.Get(var, Kokkos::subview(array_h, slice).data(), adios2::Mode::Sync); - Kokkos::deep_copy(Kokkos::subview(array, slice), - Kokkos::subview(array_h, slice)); - } else { - raise::Error( - fmt::format("Variable: s%d_%s not found", s + 1, quantity.c_str()), - HERE); - } - } - - template - void ReadParticlePayloads(adios2::IO& io, - adios2::Engine& reader, - const std::string& suffix, - spidx_t s, - array_t& array, - std::size_t nplds, - npart_t count, - npart_t offset) { - logger::Checkpoint( - fmt::format("Reading quantity: s%d_pld_%s", s + 1, suffix.c_str()), - HERE); - auto var = io.InquireVariable( - fmt::format("s%d_pld_%s", s + 1, suffix.c_str())); - if (var) { - var.SetSelection(adios2::Box({ offset, 0 }, { count, nplds })); - const auto slice = range_tuple_t(0, count); - auto array_h = Kokkos::create_mirror_view(array); - reader.Get(var, - Kokkos::subview(array_h, slice, range_tuple_t(0, nplds)).data(), - adios2::Mode::Sync); - Kokkos::deep_copy(array, array_h); - } else { - raise::Error( - fmt::format("Variable: s%d_pld_%s not found", s + 1, suffix.c_str()), - HERE); - } - } - -#define CHECKPOINT_FIELDS(D, N) \ - template void ReadFields(adios2::IO&, \ - adios2::Engine&, \ - const std::string&, \ - const adios2::Box&, \ - ndfield_t&); - CHECKPOINT_FIELDS(Dim::_1D, 3) - CHECKPOINT_FIELDS(Dim::_2D, 3) - CHECKPOINT_FIELDS(Dim::_3D, 3) - CHECKPOINT_FIELDS(Dim::_1D, 6) - CHECKPOINT_FIELDS(Dim::_2D, 6) - CHECKPOINT_FIELDS(Dim::_3D, 6) -#undef CHECKPOINT_FIELDS - -#define CHECKPOINT_PARTICLE_DATA(T) \ - template void ReadParticleData(adios2::IO&, \ - adios2::Engine&, \ - const std::string&, \ - spidx_t, \ - array_t&, \ - npart_t, \ - npart_t); - CHECKPOINT_PARTICLE_DATA(int) - CHECKPOINT_PARTICLE_DATA(float) - CHECKPOINT_PARTICLE_DATA(double) - CHECKPOINT_PARTICLE_DATA(short) -#undef CHECKPOINT_PARTICLE_DATA - -#define CHECKPOINT_PARTICLE_PAYLOADS(T) \ - template void ReadParticlePayloads(adios2::IO&, \ - adios2::Engine&, \ - const std::string&, \ - spidx_t, \ - array_t&, \ - std::size_t, \ - npart_t, \ - npart_t); - CHECKPOINT_PARTICLE_PAYLOADS(int) - CHECKPOINT_PARTICLE_PAYLOADS(float) - CHECKPOINT_PARTICLE_PAYLOADS(double) - CHECKPOINT_PARTICLE_PAYLOADS(npart_t) -#undef CHECKPOINT_PARTICLE_DATA - -} // namespace checkpoint diff --git a/src/checkpoint/reader.h b/src/checkpoint/reader.h deleted file mode 100644 index 78cc73eb7..000000000 --- a/src/checkpoint/reader.h +++ /dev/null @@ -1,57 +0,0 @@ -/** - * @file checkpoint/reader.h - * @brief Function for reading field & particle data from checkpoint files - * @implements - * - checkpoint::ReadFields -> void - * - checkpoint::ReadParticleData -> void - * - checkpoint::ReadParticleCount -> std::pair - * @cpp: - * - reader.cpp - * @namespaces: - * - checkpoint:: - */ - -#ifndef CHECKPOINT_READER_H -#define CHECKPOINT_READER_H - -#include "arch/kokkos_aliases.h" - -#include - -#include -#include - -namespace checkpoint { - - template - void ReadFields(adios2::IO&, - adios2::Engine&, - const std::string&, - const adios2::Box&, - ndfield_t&); - - auto ReadParticleCount(adios2::IO&, adios2::Engine&, spidx_t, std::size_t, std::size_t) - -> std::pair; - - template - void ReadParticleData(adios2::IO&, - adios2::Engine&, - const std::string&, - spidx_t, - array_t&, - npart_t, - npart_t); - - template - void ReadParticlePayloads(adios2::IO&, - adios2::Engine&, - const std::string&, - spidx_t, - array_t&, - std::size_t, - npart_t, - npart_t); - -} // namespace checkpoint - -#endif // CHECKPOINT_READER_H diff --git a/src/checkpoint/tests/CMakeLists.txt b/src/checkpoint/tests/CMakeLists.txt deleted file mode 100644 index cbfd63aa9..000000000 --- a/src/checkpoint/tests/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# cmake-lint: disable=C0103,C0111 -# ------------------------------ -# @brief: Generates tests for the `ntt_checkpoint` module -# -# @uses: -# -# * kokkos [required] -# * adios2 [required] -# * mpi [optional] -# ------------------------------ - -set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../) - -function(gen_test title) - set(exec test-output-${title}.xc) - set(src ${title}.cpp) - add_executable(${exec} ${src}) - - set(libs ntt_checkpoint ntt_global) - add_dependencies(${exec} ${libs}) - target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) - - add_test(NAME "CHECKPOINT::${title}" COMMAND "${exec}") -endfunction() - -if(NOT ${mpi}) - gen_test(checkpoint-nompi) -else() - gen_test(checkpoint-mpi) -endif() diff --git a/src/checkpoint/tests/checkpoint-mpi.cpp b/src/checkpoint/tests/checkpoint-mpi.cpp deleted file mode 100644 index 2372d81bc..000000000 --- a/src/checkpoint/tests/checkpoint-mpi.cpp +++ /dev/null @@ -1,273 +0,0 @@ -#include "enums.h" -#include "global.h" - -#include "utils/comparators.h" - -#include "checkpoint/reader.h" -#include "checkpoint/writer.h" - -#include -#include -#include -#include - -#include -#include -#include - -using namespace ntt; -using namespace checkpoint; - -void cleanup() { - namespace fs = std::filesystem; - fs::path temp_path { "chck" }; - fs::remove_all(temp_path); -} - -auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); - MPI_Init(&argc, &argv); - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - - try { - // assuming 4 ranks - // |------|------| - // | 2 | 3 | - // |------|------| - // | | | - // | 0 | 1 | - // |------|------| - const std::size_t g_nx1 = 20; - const std::size_t g_nx2 = 15; - const std::size_t g_nx1_gh = g_nx1 + 4 * N_GHOSTS; - const std::size_t g_nx2_gh = g_nx2 + 4 * N_GHOSTS; - - const std::size_t l_nx1 = 10; - const std::size_t l_nx2 = (rank < 2) ? 10 : 5; - - const std::size_t l_nx1_gh = l_nx1 + 2 * N_GHOSTS; - const std::size_t l_nx2_gh = l_nx2 + 2 * N_GHOSTS; - - const std::size_t l_corner_x1 = (rank % 2 == 0) ? 0 : l_nx1_gh; - const std::size_t l_corner_x2 = (rank < 2) ? 0 : l_nx2_gh; - - const std::size_t i1min = N_GHOSTS; - const std::size_t i2min = N_GHOSTS; - const std::size_t i1max = l_nx1 + N_GHOSTS; - const std::size_t i2max = l_nx2 + N_GHOSTS; - - const std::size_t npart1 = (rank % 2 + rank) * 23 + 100; - const std::size_t npart2 = (rank % 2 + rank) * 37 + 100; - - std::size_t npart1_offset = 0; - std::size_t npart2_offset = 0; - - std::size_t npart1_globtot = 0; - std::size_t npart2_globtot = 0; - - for (auto r = 0; r < rank - 1; ++r) { - npart1_offset += (r % 2 + r) * 23 + 100; - npart2_offset += (r % 2 + r) * 37 + 100; - } - - for (auto r = 0; r < size; ++r) { - npart1_globtot += (r % 2 + r) * 23 + 100; - npart2_globtot += (r % 2 + r) * 37 + 100; - } - - // init data - ndfield_t field1 { "fld1", l_nx1_gh, l_nx2_gh }; - ndfield_t field2 { "fld2", l_nx1_gh, l_nx2_gh }; - - array_t i1 { "i_1", npart1 }; - array_t u1 { "u_1", npart1 }; - array_t i2 { "i_2", npart2 }; - array_t u2 { "u_2", npart2 }; - array_t plds1 { "plds_1", npart1, 3 }; - - { - // fill data - Kokkos::parallel_for( - "fillFlds", - CreateRangePolicy({ i1min, i2min }, { i1max, i2max }), - Lambda(index_t i1, index_t i2) { - field1(i1, i2, 0) = static_cast(i1 + i2); - field1(i1, i2, 1) = static_cast(i1 * i2); - field1(i1, i2, 2) = static_cast(i1 / i2); - field1(i1, i2, 3) = static_cast(i1 - i2); - field1(i1, i2, 4) = static_cast(i2 / i1); - field1(i1, i2, 5) = static_cast(i1); - field2(i1, i2, 0) = static_cast(-(i1 + i2)); - field2(i1, i2, 1) = static_cast(-(i1 * i2)); - field2(i1, i2, 2) = static_cast(-(i1 / i2)); - field2(i1, i2, 3) = static_cast(-(i1 - i2)); - field2(i1, i2, 4) = static_cast(-(i2 / i1)); - field2(i1, i2, 5) = static_cast(-i1); - }); - Kokkos::parallel_for( - "fillPrtl1", - npart1, - Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); - plds1(p, 0) = static_cast(p); - plds1(p, 1) = static_cast(p * p); - plds1(p, 2) = static_cast(p * p * p); - }); - Kokkos::parallel_for( - "fillPrtl2", - npart2, - Lambda(index_t p) { - u2(p) = -static_cast(p); - i2(p) = -static_cast(p); - }); - } - - adios2::ADIOS adios; - const path_t checkpoint_path { "chck" }; - - { - // write checkpoint - Writer writer; - writer.init(&adios, checkpoint_path, 0, 0.0, 1); - - writer.defineFieldVariables(SimEngine::GRPIC, - { g_nx1_gh, g_nx2_gh }, - { l_corner_x1, l_corner_x2 }, - { l_nx1_gh, l_nx2_gh }); - - writer.defineParticleVariables(Coord::Sph, Dim::_2D, 2, { 3, 0 }); - - writer.beginSaving(0, 0.0); - - writer.saveField("em", field1); - writer.saveField("em0", field2); - - writer.savePerDomainVariable("s1_npart", 1, 0, npart1); - writer.savePerDomainVariable("s2_npart", 1, 0, npart2); - - writer.saveParticleQuantity("s1_i1", - npart1_globtot, - npart1_offset, - npart1, - i1); - writer.saveParticleQuantity("s1_ux1", - npart1_globtot, - npart1_offset, - npart1, - u1); - writer.saveParticleQuantity("s2_i1", - npart2_globtot, - npart2_offset, - npart2, - i2); - writer.saveParticleQuantity("s2_ux1", - npart2_globtot, - npart2_offset, - npart2, - u2); - - writer.saveParticlePayloads("s1_plds", - 3, - npart1_globtot, - npart1_offset, - npart1, - plds1); - - writer.endSaving(); - } - - { - // read checkpoint - ndfield_t field1_read { "fld1_read", l_nx1_gh, l_nx2_gh }; - ndfield_t field2_read { "fld2_read", l_nx1_gh, l_nx2_gh }; - - array_t i1_read { "i_1", npart1 }; - array_t u1_read { "u_1", npart1 }; - array_t i2_read { "i_2", npart2 }; - array_t u2_read { "u_2", npart2 }; - array_t plds1_read { "plds_1", npart1, 3 }; - - adios2::IO io = adios.DeclareIO("checkpointRead"); - adios2::Engine reader = io.Open(checkpoint_path / "step-00000000.bp", - adios2::Mode::Read); - reader.BeginStep(); - - auto fieldRange = adios2::Box({ l_corner_x1, l_corner_x2, 0 }, - { l_nx1_gh, l_nx2_gh, 6 }); - ReadFields(io, reader, "em", fieldRange, field1_read); - ReadFields(io, reader, "em0", fieldRange, field2_read); - - auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, rank, size); - auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, rank, size); - - ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); - ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); - ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); - ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); - ReadParticlePayloads(io, reader, 0, plds1_read, 3, nprtl1, noff1); - - reader.EndStep(); - reader.Close(); - - // check the validity - Kokkos::parallel_for( - "checkFields", - CreateRangePolicy({ 0, 0 }, { l_nx1_gh, l_nx2_gh }), - Lambda(index_t i1, index_t i2) { - for (int i = 0; i < 6; ++i) { - if (not cmp::AlmostEqual(field1(i1, i2, i), field1_read(i1, i2, i))) { - raise::KernelError(HERE, "Field1 read failed"); - } - if (not cmp::AlmostEqual(field2(i1, i2, i), field2_read(i1, i2, i))) { - raise::KernelError(HERE, "Field2 read failed"); - } - } - }); - - raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); - raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); - raise::ErrorIf(noff1 != npart1_offset, "Particle offset 1 mismatch", HERE); - raise::ErrorIf(noff2 != npart2_offset, "Particle offset 2 mismatch", HERE); - - Kokkos::parallel_for( - "checkPrtl1", - nprtl1, - Lambda(index_t p) { - if (not cmp::AlmostEqual(u1(p), u1_read(p))) { - raise::KernelError(HERE, "u1 read failed"); - } - if (i1(p) != i1_read(p)) { - raise::KernelError(HERE, "i1 read failed"); - } - for (auto l = 0; l < 3; ++l) { - if (not cmp::AlmostEqual(plds1(p, l), plds1_read(p, l))) { - raise::KernelError(HERE, "plds1 read failed"); - } - } - }); - Kokkos::parallel_for( - "checkPrtl2", - nprtl2, - Lambda(index_t p) { - if (not cmp::AlmostEqual(u2(p), u2_read(p))) { - raise::KernelError(HERE, "u2 read failed"); - } - if (i2(p) != i2_read(p)) { - raise::KernelError(HERE, "i2 read failed"); - } - }); - } - - } catch (std::exception& e) { - std::cerr << e.what() << std::endl; - cleanup(); - Kokkos::finalize(); - return 1; - } - cleanup(); - Kokkos::finalize(); - return 0; -} diff --git a/src/checkpoint/tests/checkpoint-nompi.cpp b/src/checkpoint/tests/checkpoint-nompi.cpp deleted file mode 100644 index 7be41ca3f..000000000 --- a/src/checkpoint/tests/checkpoint-nompi.cpp +++ /dev/null @@ -1,248 +0,0 @@ -#include "enums.h" -#include "global.h" - -#include "utils/comparators.h" - -#include "checkpoint/reader.h" -#include "checkpoint/writer.h" - -#include -#include -#include - -#include -#include - -using namespace ntt; -using namespace checkpoint; - -void cleanup() { - namespace fs = std::filesystem; - fs::path temp_path { "chck" }; - fs::remove_all(temp_path); -} - -auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); - - try { - constexpr auto nx1 = 10; - constexpr auto nx1_gh = nx1 + 2 * N_GHOSTS; - constexpr auto nx2 = 13; - constexpr auto nx2_gh = nx2 + 2 * N_GHOSTS; - constexpr auto nx3 = 9; - constexpr auto nx3_gh = nx3 + 2 * N_GHOSTS; - constexpr auto i1min = N_GHOSTS; - constexpr auto i2min = N_GHOSTS; - constexpr auto i3min = N_GHOSTS; - constexpr auto i1max = nx1 + N_GHOSTS; - constexpr auto i2max = nx2 + N_GHOSTS; - constexpr auto i3max = nx3 + N_GHOSTS; - constexpr auto npart1 = 100; - constexpr auto npart2 = 100; - - // init data - ndfield_t field1 { "fld1", nx1_gh, nx2_gh, nx3_gh }; - ndfield_t field2 { "fld2", nx1_gh, nx2_gh, nx3_gh }; - - array_t i1 { "i_1", npart1 }; - array_t u1 { "u_1", npart1 }; - array_t i2 { "i_2", npart2 }; - array_t u2 { "u_2", npart2 }; - - array_t pldr_2 { "pldr_2", npart2, 2 }; - - array_t pldi_1 { "pldi_1", npart1, 1 }; - array_t pldi_2 { "pldi_2", npart2, 2 }; - - { - // fill data - Kokkos::parallel_for( - "fillFlds", - CreateRangePolicy({ i1min, i2min, i3min }, - { i1max, i2max, i3max }), - Lambda(index_t i1, index_t i2, index_t i3) { - const auto i1_ = static_cast(i1); - const auto i2_ = static_cast(i2); - const auto i3_ = static_cast(i3); - field1(i1, i2, i3, 0) = i1_ + i2_ + i3_; - field1(i1, i2, i3, 1) = i1_ * i2_ / i3_; - field1(i1, i2, i3, 2) = i1_ / i2_ * i3_; - field1(i1, i2, i3, 3) = i1_ + i2_ - i3_; - field1(i1, i2, i3, 4) = i1_ * i2_ + i3_; - field1(i1, i2, i3, 5) = i1_ / i2_ - i3_; - field2(i1, i2, i3, 0) = -(i1_ + i2_ + i3_); - field2(i1, i2, i3, 1) = -(i1_ * i2_ / i3_); - field2(i1, i2, i3, 2) = -(i1_ / i2_ * i3_); - field2(i1, i2, i3, 3) = -(i1_ + i2_ - i3_); - field2(i1, i2, i3, 4) = -(i1_ * i2_ + i3_); - field2(i1, i2, i3, 5) = -(i1_ / i2_ - i3_); - }); - Kokkos::parallel_for( - "fillPrtl1", - npart1, - Lambda(index_t p) { - u1(p) = static_cast(p); - i1(p) = static_cast(p); - pldi_1(p, 0) = static_cast(p * 10); - }); - Kokkos::parallel_for( - "fillPrtl2", - npart2, - Lambda(index_t p) { - u2(p) = -static_cast(p); - i2(p) = -static_cast(p); - pldr_2(p, 0) = static_cast(p); - pldr_2(p, 1) = static_cast(p * 2); - pldi_2(p, 0) = static_cast(p * 3); - pldi_2(p, 1) = static_cast(p * 4); - }); - } - - adios2::ADIOS adios; - const path_t checkpoint_path { "chck" }; - - { - // write checkpoint - Writer writer {}; - writer.init(&adios, checkpoint_path, 0, 0.0, 1); - - writer.defineFieldVariables(SimEngine::GRPIC, - { nx1_gh, nx2_gh, nx3_gh }, - { 0, 0, 0 }, - { nx1_gh, nx2_gh, nx3_gh }); - writer.defineParticleVariables(Coord::Sph, Dim::_3D, 2, { 0, 2 }, { 1, 2 }); - - writer.beginSaving(0, 0.0); - - writer.saveField("em", field1); - writer.saveField("em0", field2); - - writer.savePerDomainVariable("s1_npart", 1, 0, npart1); - writer.savePerDomainVariable("s2_npart", 1, 0, npart2); - - writer.saveParticleQuantity("s1_i1", npart1, 0, npart1, i1); - writer.saveParticleQuantity("s1_ux1", npart1, 0, npart1, u1); - writer.saveParticleQuantity("s2_i1", npart2, 0, npart2, i2); - writer.saveParticleQuantity("s2_ux1", npart2, 0, npart2, u2); - - writer.saveParticlePayloads("s2_pld_r", 2, npart2, 0, npart2, pldr_2); - - writer.saveParticlePayloads("s1_pld_i", 1, npart1, 0, npart1, pldi_1); - writer.saveParticlePayloads("s2_pld_i", 2, npart2, 0, npart2, pldi_2); - - writer.endSaving(); - } - - { - // read checkpoint - ndfield_t field1_read { "fld1_read", nx1_gh, nx2_gh, nx3_gh }; - ndfield_t field2_read { "fld2_read", nx1_gh, nx2_gh, nx3_gh }; - - array_t i1_read { "i_1", npart1 }; - array_t u1_read { "u_1", npart1 }; - array_t i2_read { "i_2", npart2 }; - array_t u2_read { "u_2", npart2 }; - - array_t pldr_2_read { "pldr_2", npart2, 2 }; - - array_t pldi_1_read { "pldi_1", npart1, 1 }; - array_t pldi_2_read { "pldi_2", npart2, 2 }; - - adios2::IO io = adios.DeclareIO("checkpointRead"); - adios2::Engine reader = io.Open(checkpoint_path / "step-00000000.bp", - adios2::Mode::Read); - reader.BeginStep(); - - auto fieldRange = adios2::Box({ 0, 0, 0, 0 }, - { nx1_gh, nx2_gh, nx3_gh, 6 }); - ReadFields(io, reader, "em", fieldRange, field1_read); - ReadFields(io, reader, "em0", fieldRange, field2_read); - - auto [nprtl1, noff1] = ReadParticleCount(io, reader, 0, 0, 1); - auto [nprtl2, noff2] = ReadParticleCount(io, reader, 1, 0, 1); - - ReadParticleData(io, reader, "ux1", 0, u1_read, nprtl1, noff1); - ReadParticleData(io, reader, "ux1", 1, u2_read, nprtl2, noff2); - ReadParticleData(io, reader, "i1", 0, i1_read, nprtl1, noff1); - ReadParticleData(io, reader, "i1", 1, i2_read, nprtl2, noff2); - - ReadParticlePayloads(io, reader, "r", 1, pldr_2_read, 2, nprtl2, noff2); - - ReadParticlePayloads(io, reader, "i", 0, pldi_1_read, 1, nprtl1, noff1); - ReadParticlePayloads(io, reader, "i", 1, pldi_2_read, 2, nprtl2, noff2); - - reader.EndStep(); - reader.Close(); - - // check the validity - Kokkos::parallel_for( - "checkFields", - CreateRangePolicy({ 0, 0, 0 }, { nx1_gh, nx2_gh, nx3_gh }), - Lambda(index_t i1, index_t i2, index_t i3) { - for (int i = 0; i < 6; ++i) { - if (not cmp::AlmostEqual(field1(i1, i2, i3, i), - field1_read(i1, i2, i3, i))) { - raise::KernelError(HERE, "Field1 read failed"); - } - if (not cmp::AlmostEqual(field2(i1, i2, i3, i), - field2_read(i1, i2, i3, i))) { - raise::KernelError(HERE, "Field2 read failed"); - } - } - }); - - raise::ErrorIf(npart1 != nprtl1, "Particle count 1 mismatch", HERE); - raise::ErrorIf(npart2 != nprtl2, "Particle count 2 mismatch", HERE); - raise::ErrorIf(noff1 != 0, "Particle offset 1 mismatch", HERE); - raise::ErrorIf(noff2 != 0, "Particle offset 2 mismatch", HERE); - - Kokkos::parallel_for( - "checkPrtl1", - npart1, - Lambda(index_t p) { - if (not cmp::AlmostEqual(u1(p), u1_read(p))) { - raise::KernelError(HERE, "u1 read failed"); - } - if (i1(p) != i1_read(p)) { - raise::KernelError(HERE, "i1 read failed"); - } - if (pldi_1(p, 0) != pldi_1_read(p, 0)) { - raise::KernelError(HERE, "pldi_1 read failed"); - } - }); - Kokkos::parallel_for( - "checkPrtl2", - npart2, - Lambda(index_t p) { - if (not cmp::AlmostEqual(u2(p), u2_read(p))) { - raise::KernelError(HERE, "u2 read failed"); - } - if (i2(p) != i2_read(p)) { - raise::KernelError(HERE, "i2 read failed"); - } - if (not cmp::AlmostEqual(pldr_2(p, 0), pldr_2_read(p, 0))) { - raise::KernelError(HERE, "pldr_2(0) read failed"); - } - if (not cmp::AlmostEqual(pldr_2(p, 1), pldr_2_read(p, 1))) { - raise::KernelError(HERE, "pldr_2(1) read failed"); - } - if (pldi_2(p, 0) != pldi_2_read(p, 0)) { - raise::KernelError(HERE, "pldi_2(0) read failed"); - } - if (pldi_2(p, 1) != pldi_2_read(p, 1)) { - raise::KernelError(HERE, "pldi_2(1) read failed"); - } - }); - } - - } catch (std::exception& e) { - std::cerr << e.what() << std::endl; - cleanup(); - Kokkos::finalize(); - return 1; - } - cleanup(); - Kokkos::finalize(); - return 0; -} diff --git a/src/checkpoint/writer.cpp b/src/checkpoint/writer.cpp deleted file mode 100644 index ebb8663ae..000000000 --- a/src/checkpoint/writer.cpp +++ /dev/null @@ -1,285 +0,0 @@ -#include "checkpoint/writer.h" - -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "utils/error.h" -#include "utils/formatting.h" -#include "utils/log.h" - -#include "framework/parameters.h" - -#include -#include - -#include -#include -#include -#include - -namespace checkpoint { - - void Writer::init(adios2::ADIOS* ptr_adios, - const path_t& checkpoint_root, - timestep_t interval, - simtime_t interval_time, - int keep, - const std::string& walltime) { - m_keep = keep; - m_checkpoint_root = checkpoint_root; - m_enabled = keep != 0; - if (not m_enabled) { - return; - } - m_tracker.init("checkpoint", interval, interval_time, walltime); - p_adios = ptr_adios; - raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - - m_io = p_adios->DeclareIO("Entity::Checkpoint"); - m_io.SetEngine("BPFile"); - - m_io.DefineVariable("Step"); - m_io.DefineVariable("Time"); - m_io.DefineAttribute("NGhosts", ntt::N_GHOSTS); - - CallOnce( - [](auto&& checkpoint_root) { - if (!std::filesystem::exists(checkpoint_root)) { - std::filesystem::create_directory(checkpoint_root); - } - }, - m_checkpoint_root); - } - - void Writer::defineFieldVariables(const ntt::SimEngine& S, - const std::vector& glob_shape, - const std::vector& loc_corner, - const std::vector& loc_shape) { - auto gs6 = std::vector(glob_shape.begin(), glob_shape.end()); - auto lc6 = std::vector(loc_corner.begin(), loc_corner.end()); - auto ls6 = std::vector(loc_shape.begin(), loc_shape.end()); - gs6.push_back(6); - lc6.push_back(0); - ls6.push_back(6); - - m_io.DefineVariable("em", gs6, lc6, ls6); - if (S == ntt::SimEngine::GRPIC) { - m_io.DefineVariable("em0", gs6, lc6, ls6); - auto gs3 = std::vector(glob_shape.begin(), glob_shape.end()); - auto lc3 = std::vector(loc_corner.begin(), loc_corner.end()); - auto ls3 = std::vector(loc_shape.begin(), loc_shape.end()); - gs3.push_back(3); - lc3.push_back(0); - ls3.push_back(3); - m_io.DefineVariable("cur0", gs3, lc3, ls3); - } - } - - // void Writer::defineParticleVariables(const ntt::Coord& C, - // Dimension dim, - // std::size_t nspec, - // const std::vector& npld_r, - // const std::vector& npld_i) { - // raise::ErrorIf( - // npld_r.size() != nspec, - // "Number of real payloads does not match the number of species", - // HERE); - // raise::ErrorIf( - // npld_i.size() != nspec, - // "Number of int payloads does not match the number of species", - // HERE); - // for (auto s { 0u }; s < nspec; ++s) { - // m_io.DefineVariable(fmt::format("s%d_npart", s + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // m_io.DefineVariable(fmt::format("s%d_counter", s + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // - // for (auto d { 0u }; d < dim; ++d) { - // m_io.DefineVariable(fmt::format("s%d_i%d", s + 1, d + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // m_io.DefineVariable(fmt::format("s%d_dx%d", s + 1, d + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // m_io.DefineVariable(fmt::format("s%d_i%d_prev", s + 1, d + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // m_io.DefineVariable(fmt::format("s%d_dx%d_prev", s + 1, d + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // } - // - // if (dim == Dim::_2D and C != ntt::Coord::Cart) { - // m_io.DefineVariable(fmt::format("s%d_phi", s + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // } - // - // for (auto d { 0u }; d < 3; ++d) { - // m_io.DefineVariable(fmt::format("s%d_ux%d", s + 1, d + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // } - // - // m_io.DefineVariable(fmt::format("s%d_tag", s + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // m_io.DefineVariable(fmt::format("s%d_weight", s + 1), - // { adios2::UnknownDim }, - // { adios2::UnknownDim }, - // { adios2::UnknownDim }); - // if (npld_r[s] > 0) { - // m_io.DefineVariable(fmt::format("s%d_pld_r", s + 1), - // { adios2::UnknownDim, npld_r[s] }, - // { adios2::UnknownDim, 0 }, - // { adios2::UnknownDim, npld_r[s] }); - // } - // if (npld_i[s] > 0) { - // m_io.DefineVariable(fmt::format("s%d_pld_i", s + 1), - // { adios2::UnknownDim, npld_i[s] }, - // { adios2::UnknownDim, 0 }, - // { adios2::UnknownDim, npld_i[s] }); - // } - // } - // } - - auto Writer::shouldSave(timestep_t step, simtime_t time) -> bool { - return m_enabled and m_tracker.shouldWrite(step, time); - } - - void Writer::beginSaving(timestep_t step, simtime_t time) { - raise::ErrorIf(!m_enabled, "Checkpoint is not enabled", HERE); - raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - if (m_writing_mode) { - raise::Fatal("Already writing", HERE); - } - m_writing_mode = true; - try { - const auto filename = m_checkpoint_root / fmt::format("step-%08lu.bp", step); - const auto metafilename = m_checkpoint_root / - fmt::format("meta-%08lu.toml", step); - m_writer = m_io.Open(filename, adios2::Mode::Write); - m_written.push_back({ filename, metafilename }); - logger::Checkpoint(fmt::format("Writing checkpoint to %s and %s", - filename.c_str(), - metafilename.c_str()), - HERE); - } catch (std::exception& e) { - raise::Fatal(e.what(), HERE); - } - - m_writer.BeginStep(); - m_writer.Put(m_io.InquireVariable("Step"), &step); - m_writer.Put(m_io.InquireVariable("Time"), &time); - } - - void Writer::endSaving() { - raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); - if (!m_writing_mode) { - raise::Fatal("Not writing", HERE); - } - m_writing_mode = false; - m_writer.EndStep(); - m_writer.Close(); - - // optionally remove the oldest checkpoint - CallOnce([&]() { - if (m_keep > 0 and m_written.size() > (std::size_t)m_keep) { - const auto oldest = m_written.front(); - if (std::filesystem::exists(oldest.first) and - std::filesystem::exists(oldest.second)) { - std::filesystem::remove_all(oldest.first); - std::filesystem::remove(oldest.second); - m_written.erase(m_written.begin()); - } else { - raise::Warning("Checkpoint file does not exist for some reason", HERE); - } - } - }); - } - - void Writer::saveAttrs(const ntt::SimulationParams& params, simtime_t time) { - CallOnce([&]() { - std::ofstream metadata; - if (m_written.empty()) { - raise::Fatal("No checkpoint file to save metadata", HERE); - } - metadata.open(m_written.back().second.c_str()); - metadata << "[metadata]\n" - << " time = " << time << "\n\n" - << params.data() << std::endl; - metadata.close(); - }); - } - - template - void Writer::saveField(const std::string& fieldname, - const ndfield_t& field) { - auto field_h = Kokkos::create_mirror_view(field); - Kokkos::deep_copy(field_h, field); - m_writer.Put(m_io.InquireVariable(fieldname), - field_h.data(), - adios2::Mode::Sync); - } - - // template - // void Writer::saveParticleQuantity(const std::string& quantity, - // npart_t glob_total, - // npart_t loc_offset, - // npart_t loc_size, - // const array_t& data) { - // const auto slice = range_tuple_t(0, loc_size); - // auto var = m_io.InquireVariable(quantity); - // - // var.SetShape({ glob_total }); - // var.SetSelection(adios2::Box({ loc_offset }, { loc_size })); - // - // auto data_h = Kokkos::create_mirror_view(data); - // Kokkos::deep_copy(data_h, data); - // auto data_sub = Kokkos::subview(data_h, slice); - // m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); - // } - // - // template - // void Writer::saveParticlePayloads(const std::string& quantity, - // std::size_t nplds, - // npart_t glob_total, - // npart_t loc_offset, - // npart_t loc_size, - // const array_t& data) { - // const auto slice = range_tuple_t(0, loc_size); - // auto var = m_io.InquireVariable(quantity); - // - // var.SetShape({ glob_total, nplds }); - // var.SetSelection( - // adios2::Box({ loc_offset, 0 }, { loc_size, nplds })); - // - // auto data_h = Kokkos::create_mirror_view(data); - // Kokkos::deep_copy(data_h, data); - // auto data_sub = Kokkos::subview(data_h, slice, range_tuple_t(0, nplds)); - // m_writer.Put(var, data_sub.data(), adios2::Mode::Sync); - // } - -#define CHECKPOINT_FIELD(D, N) \ - template void Writer::saveField(const std::string&, \ - const ndfield_t&); - CHECKPOINT_FIELD(Dim::_1D, 3) - CHECKPOINT_FIELD(Dim::_1D, 6) - CHECKPOINT_FIELD(Dim::_2D, 3) - CHECKPOINT_FIELD(Dim::_2D, 6) - CHECKPOINT_FIELD(Dim::_3D, 3) - CHECKPOINT_FIELD(Dim::_3D, 6) -#undef CHECKPOINT_FIELD - -} // namespace checkpoint diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 07328fff6..014967870 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -47,14 +47,15 @@ set(SOURCES if(${output}) list(APPEND SOURCES ${SRC_DIR}/domain/output.cpp) list(APPEND SOURCES ${SRC_DIR}/domain/checkpoint.cpp) + list(APPEND SOURCES ${SRC_DIR}/containers/fields_io.cpp) list(APPEND SOURCES ${SRC_DIR}/containers/particles_io.cpp) endif() +if(${mpi}) + list(APPEND SOURCES ${SRC_DIR}/containers/particles_comm.cpp) +endif() add_library(ntt_framework ${SOURCES}) set(libs ntt_global ntt_metrics ntt_kernels ntt_output) -if(${output}) - list(APPEND libs ntt_checkpoint) -endif() add_dependencies(ntt_framework ${libs}) target_link_libraries(ntt_framework PUBLIC ${libs}) target_link_libraries(ntt_framework PRIVATE stdc++fs) diff --git a/src/framework/containers/fields.h b/src/framework/containers/fields.h index ee9d656d6..4acabf7b4 100644 --- a/src/framework/containers/fields.h +++ b/src/framework/containers/fields.h @@ -21,6 +21,10 @@ #include "arch/kokkos_aliases.h" +#if defined(OUTPUT_ENABLED) + #include +#endif + #include namespace ntt { @@ -161,6 +165,18 @@ namespace ntt { (em_footprint + bckp_footprint + cur_footprint + buff_footprint + aux_footprint + em0_footprint + cur0_footprint); } + +/* helpers ---------------------------------------------------------------- */ +#if defined(OUTPUT_ENABLED) + void CheckpointDeclare(adios2::IO&, + const std::vector&, + const std::vector&, + const std::vector&) const; + void CheckpointRead(adios2::IO&, + adios2::Engine&, + const adios2::Box&); + void CheckpointWrite(adios2::IO&, adios2::Engine&) const; +#endif }; } // namespace ntt diff --git a/src/framework/containers/fields_io.cpp b/src/framework/containers/fields_io.cpp new file mode 100644 index 000000000..7d01451bd --- /dev/null +++ b/src/framework/containers/fields_io.cpp @@ -0,0 +1,96 @@ +#include "enums.h" +#include "global.h" + +#include "utils/log.h" + +#include "framework/containers/fields.h" +#include "output/utils/readers.h" +#include "output/utils/writers.h" + +#include + +#if defined(MPI_ENABLED) + #include +#endif + +#include + +namespace ntt { + + template + void Fields::CheckpointDeclare( + adios2::IO& io, + const std::vector& local_shape, + const std::vector& global_shape, + const std::vector& local_offset) const { + logger::Checkpoint("Declaring fields checkpoint", HERE); + + auto gs6 = std::vector(global_shape.begin(), global_shape.end()); + auto lo6 = std::vector(local_offset.begin(), local_offset.end()); + auto ls6 = std::vector(local_shape.begin(), local_shape.end()); + gs6.push_back(6); + lo6.push_back(0); + ls6.push_back(6); + + io.DefineVariable("em", gs6, lo6, ls6); + if (S == ntt::SimEngine::GRPIC) { + io.DefineVariable("em0", gs6, lo6, ls6); + auto gs3 = std::vector(global_shape.begin(), global_shape.end()); + auto lo3 = std::vector(local_offset.begin(), local_offset.end()); + auto ls3 = std::vector(local_shape.begin(), local_shape.end()); + gs3.push_back(3); + lo3.push_back(0); + ls3.push_back(3); + io.DefineVariable("cur0", gs3, lo3, ls3); + } + } + + template + void Fields::CheckpointRead(adios2::IO& io, + adios2::Engine& reader, + const adios2::Box& range) { + logger::Checkpoint("Reading fields checkpoint", HERE); + + auto range6 = adios2::Box(range.first, range.second); + range6.first.push_back(0); + range6.second.push_back(6); + out::ReadNDField(io, reader, "em", em, range6); + if (S == ntt::SimEngine::GRPIC) { + out::ReadNDField(io, reader, "em0", em0, range6); + auto range3 = adios2::Box(range.first, range.second); + range3.first.push_back(0); + range3.second.push_back(3); + out::ReadNDField(io, reader, "cur0", cur0, range3); + } + } + + template + void Fields::CheckpointWrite(adios2::IO& io, adios2::Engine& writer) const { + logger::Checkpoint("Writing fields checkpoint", HERE); + + out::WriteNDField(io, writer, "em", em); + if (S == ntt::SimEngine::GRPIC) { + out::WriteNDField(io, writer, "em0", em0); + out::WriteNDField(io, writer, "cur0", cur0); + } + } + +#define FIELDS_CHECKPOINTS(D, S) \ + template void Fields::CheckpointDeclare(adios2::IO&, \ + const std::vector&, \ + const std::vector&, \ + const std::vector&) \ + const; \ + template void Fields::CheckpointRead(adios2::IO&, \ + adios2::Engine&, \ + const adios2::Box&); \ + template void Fields::CheckpointWrite(adios2::IO&, adios2::Engine&) const; + + FIELDS_CHECKPOINTS(Dim::_1D, SimEngine::SRPIC) + FIELDS_CHECKPOINTS(Dim::_2D, SimEngine::SRPIC) + FIELDS_CHECKPOINTS(Dim::_3D, SimEngine::SRPIC) + FIELDS_CHECKPOINTS(Dim::_2D, SimEngine::GRPIC) + FIELDS_CHECKPOINTS(Dim::_3D, SimEngine::GRPIC) +#undef FIELDS_CHECKPOINTS + +} // namespace ntt diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index 870a6e0ae..70b7f2458 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -22,6 +22,7 @@ namespace ntt { logger::Checkpoint( fmt::format("Declaring particle checkpoint for species #%d", index()), HERE); + io.DefineVariable(fmt::format("s%d_npart", index()), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -97,24 +98,20 @@ namespace ntt { "Particles already initialized before reading checkpoint", HERE); npart_t npart_offset = 0u; + npart_t npart_read; out::ReadVariable(io, reader, fmt::format("s%d_npart", index()), - m_npart, + npart_read, domains_offset); - - raise::ErrorIf( - npart() > maxnpart(), - fmt::format("npart %d > maxnpart %d after reading checkpoint", - npart(), - maxnpart()), - HERE); + set_npart(npart_read); #if defined(MPI_ENABLED) { + const auto npart_send = npart(); std::vector glob_nparts(domains_total); - MPI_Allgather(&m_npart, + MPI_Allgather(&npart_send, 1, mpi::get_type(), glob_nparts.data(), @@ -324,24 +321,28 @@ namespace ntt { writer, fmt::format("s%d_i1", index()), i1, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx1", index()), dx1, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_i1_prev", index()), i1_prev, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx1_prev", index()), dx1_prev, + npart(), npart_total, npart_offset); } @@ -351,24 +352,28 @@ namespace ntt { writer, fmt::format("s%d_i2", index()), i2, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx2", index()), dx2, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_i2_prev", index()), i2_prev, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx2_prev", index()), dx2_prev, + npart(), npart_total, npart_offset); } @@ -378,24 +383,28 @@ namespace ntt { writer, fmt::format("s%d_i3", index()), i3, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx3", index()), dx3, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_i3_prev", index()), i3_prev, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_dx3_prev", index()), dx3_prev, + npart(), npart_total, npart_offset); } @@ -405,6 +414,7 @@ namespace ntt { writer, fmt::format("s%d_phi", index()), phi, + npart(), npart_total, npart_offset); } @@ -413,30 +423,35 @@ namespace ntt { writer, fmt::format("s%d_ux1", index()), ux1, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_ux2", index()), ux2, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_ux3", index()), ux3, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_tag", index()), tag, + npart(), npart_total, npart_offset); out::Write1DArray(io, writer, fmt::format("s%d_weight", index()), weight, + npart(), npart_total, npart_offset); if (npld_r() > 0) { @@ -444,9 +459,10 @@ namespace ntt { writer, fmt::format("s%d_pld_r", index()), pld_r, + npld_r(), + npart(), npart_total, - npart_offset, - npld_r()); + npart_offset); } if (npld_i() > 0) { @@ -454,29 +470,31 @@ namespace ntt { writer, fmt::format("s%d_pld_i", index()), pld_i, + npld_i(), + npart(), npart_total, - npart_offset, - npld_i()); + npart_offset); } } #define PARTICLES_CHECKPOINTS(D, C) \ - template void Particles::CheckpointDeclare(adios2::ADIOS&) const; \ - template void Particles::CheckpointRead(adios2::ADIOS&, \ + template void Particles::CheckpointDeclare(adios2::IO&) const; \ + template void Particles::CheckpointRead(adios2::IO&, \ adios2::Engine&, \ std::size_t, \ std::size_t); \ template void Particles::CheckpointWrite(adios2::IO&, \ adios2::Engine&, \ std::size_t, \ - std::size_t) const; \ - PARTICLES_CHECKPOINTS(Dim::_1D, Coord::Cart) \ - PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Cart) \ - PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Cart) \ - PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Sph) \ - PARTICLES_CHECKPOINTS(Dim::_2D, Coord::QSph) \ - PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Sph) \ - PARTICLES_CHECKPOINTS(Dim::_3D, Coord::QSph) + std::size_t) const; + + PARTICLES_CHECKPOINTS(Dim::_1D, Coord::Cart) + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Cart) + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Cart) + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Sph) + PARTICLES_CHECKPOINTS(Dim::_2D, Coord::Qsph) + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Sph) + PARTICLES_CHECKPOINTS(Dim::_3D, Coord::Qsph) #undef PARTICLES_CHECKPOINTS } // namespace ntt diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/checkpoint.cpp index 8fcefa989..c0a0fac6a 100644 --- a/src/framework/domain/checkpoint.cpp +++ b/src/framework/domain/checkpoint.cpp @@ -1,3 +1,5 @@ +#include "output/checkpoint.h" + #include "enums.h" #include "global.h" @@ -12,8 +14,6 @@ #include "metrics/qspherical.h" #include "metrics/spherical.h" -#include "checkpoint/reader.h" -#include "checkpoint/writer.h" #include "framework/domain/metadomain.h" #include "framework/parameters.h" @@ -59,11 +59,11 @@ namespace ntt { params.template get("checkpoint.keep"), params.template get("checkpoint.walltime")); if (g_checkpoint_writer.enabled()) { - g_checkpoint_writer.defineFieldVariables(S, - glob_shape_with_ghosts, - off_ncells_with_ghosts, - loc_shape_with_ghosts); - for (auto& species : local_domain->species) { + local_domain->fields.CheckpointDeclare(g_checkpoint_writer.io(), + loc_shape_with_ghosts, + glob_shape_with_ghosts, + off_ncells_with_ghosts); + for (const auto& species : local_domain->species) { species.CheckpointDeclare(g_checkpoint_writer.io()); } } @@ -90,16 +90,17 @@ namespace ntt { logger::Checkpoint("Writing checkpoint", HERE); g_checkpoint_writer.beginSaving(current_step, current_time); { - g_checkpoint_writer.saveAttrs(params, current_time); - g_checkpoint_writer.saveField("em", local_domain->fields.em); - if constexpr (S == SimEngine::GRPIC) { - g_checkpoint_writer.saveField("em0", local_domain->fields.em0); - g_checkpoint_writer.saveField("cur0", local_domain->fields.cur0); + if (g_checkpoint_writer.written().empty()) { + raise::Fatal("No checkpoint file to save metadata", HERE); } - std::size_t dom_tot = 1, dom_offset = 0; -#if defined(MPI_ENABLED) - dom_tot = g_mpi_size; - dom_offset = g_mpi_rank; + params.saveTOML(g_checkpoint_writer.written().back().second, current_time); + + local_domain->fields.CheckpointWrite(g_checkpoint_writer.io(), + g_checkpoint_writer.writer()); +#if !defined(MPI_ENABLED) + const std::size_t dom_tot = 1, dom_offset = 0; +#else + const std::size_t dom_tot = g_mpi_size, dom_offset = g_mpi_rank; #endif // MPI_ENABLED for (const auto& species : local_domain->species) { @@ -124,6 +125,7 @@ namespace ntt { fmt::format("step-%08lu.bp", params.template get( "checkpoint.start_step")); + logger::Checkpoint(fmt::format("Reading checkpoint from %s", fname.c_str()), HERE); @@ -137,45 +139,25 @@ namespace ntt { reader.BeginStep(); for (const auto local_domain_idx : l_subdomain_indices()) { - auto& domain = g_subdomains[local_domain_idx]; + auto local_domain = subdomain_ptr(local_domain_idx); + adios2::Box range; for (auto d { 0u }; d < M::Dim; ++d) { - range.first.push_back(domain.offset_ncells()[d] + - 2 * N_GHOSTS * domain.offset_ndomains()[d]); - range.second.push_back(domain.mesh.n_all()[d]); - } - range.first.push_back(0); - range.second.push_back(6); - checkpoint::ReadFields(io, reader, "em", range, domain.fields.em); - if constexpr (S == ntt::SimEngine::GRPIC) { - checkpoint::ReadFields(io, - reader, - "em0", - range, - domain.fields.em0); - adios2::Box range3; - for (auto d { 0u }; d < M::Dim; ++d) { - range3.first.push_back(domain.offset_ncells()[d] + - 2 * N_GHOSTS * domain.offset_ndomains()[d]); - range3.second.push_back(domain.mesh.n_all()[d]); - } - range3.first.push_back(0); - range3.second.push_back(3); - checkpoint::ReadFields(io, - reader, - "cur0", - range3, - domain.fields.cur0); + range.first.push_back(local_domain->offset_ncells()[d] + + 2 * N_GHOSTS * local_domain->offset_ndomains()[d]); + range.second.push_back(local_domain->mesh.n_all()[d]); } + local_domain->fields.CheckpointRead(io, reader, range); - for (auto& species : domain.species) { - species.CheckpointRead(io, reader, local_domain_idx, ndomains()); + for (auto& species : local_domain->species) { + species.CheckpointRead(io, reader, ndomains(), local_domain_idx); } } // local subdomain loop reader.EndStep(); reader.Close(); + logger::Checkpoint( fmt::format("Checkpoint reading done from %s", fname.c_str()), HERE); diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 7ddacffb3..a456879b4 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -31,7 +31,7 @@ #endif // MPI_ENABLED #if defined(OUTPUT_ENABLED) - #include "checkpoint/writer.h" + #include "output/checkpoint.h" #include "output/writer.h" #include diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index 27906b6e4..9ed1e1e18 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -23,6 +23,8 @@ #include #endif +#include +#include #include #include #include @@ -31,10 +33,10 @@ namespace ntt { template - auto get_dx0_V0(const std::vector& resolution, - const boundaries_t& extent, - const std::map& params) - -> std::pair { + auto get_dx0_V0( + const std::vector& resolution, + const boundaries_t& extent, + const std::map& params) -> std::pair { const auto metric = M(resolution, extent, params); const auto dx0 = metric.dxMin(); coord_t x_corner { ZERO }; @@ -1031,4 +1033,16 @@ namespace ntt { "Have not defined all the necessary variables", HERE); } + + void SimulationParams::saveTOML(const std::string& path, simtime_t time) const { + CallOnce([&]() { + std::ofstream metadata; + metadata.open(path); + metadata << "[metadata]\n" + << " time = " << time << "\n\n" + << data() << std::endl; + metadata.close(); + }); + } + } // namespace ntt diff --git a/src/framework/parameters.h b/src/framework/parameters.h index 723e860db..0af4fa405 100644 --- a/src/framework/parameters.h +++ b/src/framework/parameters.h @@ -20,6 +20,8 @@ #include "utils/param_container.h" #include "utils/toml.h" +#include + namespace ntt { struct SimulationParams : public prm::Parameters { @@ -52,6 +54,8 @@ namespace ntt { raw_data = data; } + void saveTOML(const std::string&, simtime_t) const; + private: toml::value raw_data; }; diff --git a/src/output/CMakeLists.txt b/src/output/CMakeLists.txt index ef20dae56..e15378c62 100644 --- a/src/output/CMakeLists.txt +++ b/src/output/CMakeLists.txt @@ -30,6 +30,7 @@ set(SOURCES ${SRC_DIR}/stats.cpp ${SRC_DIR}/fields.cpp ${SRC_DIR}/utils/interpret_prompt.cpp) if(${output}) list(APPEND SOURCES ${SRC_DIR}/writer.cpp) + list(APPEND SOURCES ${SRC_DIR}/checkpoint.cpp) list(APPEND SOURCES ${SRC_DIR}/utils/writers.cpp) list(APPEND SOURCES ${SRC_DIR}/utils/readers.cpp) endif() diff --git a/src/output/checkpoint.cpp b/src/output/checkpoint.cpp new file mode 100644 index 000000000..4d74ae23e --- /dev/null +++ b/src/output/checkpoint.cpp @@ -0,0 +1,104 @@ +#include "output/checkpoint.h" + +#include "global.h" + +#include "utils/error.h" +#include "utils/formatting.h" +#include "utils/log.h" + +#include +#include + +#include +#include + +namespace checkpoint { + + void Writer::init(adios2::ADIOS* ptr_adios, + const path_t& checkpoint_root, + timestep_t interval, + simtime_t interval_time, + int keep, + const std::string& walltime) { + m_keep = keep; + m_checkpoint_root = checkpoint_root; + m_enabled = keep != 0; + if (not m_enabled) { + return; + } + m_tracker.init("checkpoint", interval, interval_time, walltime); + p_adios = ptr_adios; + raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); + + m_io = p_adios->DeclareIO("Entity::Checkpoint"); + m_io.SetEngine("BPFile"); + + m_io.DefineVariable("Step"); + m_io.DefineVariable("Time"); + m_io.DefineAttribute("NGhosts", ntt::N_GHOSTS); + + CallOnce( + [](auto&& checkpoint_root) { + if (!std::filesystem::exists(checkpoint_root)) { + std::filesystem::create_directory(checkpoint_root); + } + }, + m_checkpoint_root); + } + + auto Writer::shouldSave(timestep_t step, simtime_t time) -> bool { + return m_enabled and m_tracker.shouldWrite(step, time); + } + + void Writer::beginSaving(timestep_t step, simtime_t time) { + raise::ErrorIf(!m_enabled, "Checkpoint is not enabled", HERE); + raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); + if (m_writing_mode) { + raise::Fatal("Already writing", HERE); + } + m_writing_mode = true; + try { + const auto filename = m_checkpoint_root / fmt::format("step-%08lu.bp", step); + const auto metafilename = m_checkpoint_root / + fmt::format("meta-%08lu.toml", step); + m_writer = m_io.Open(filename, adios2::Mode::Write); + m_written.push_back({ filename, metafilename }); + logger::Checkpoint(fmt::format("Writing checkpoint to %s and %s", + filename.c_str(), + metafilename.c_str()), + HERE); + } catch (std::exception& e) { + raise::Fatal(e.what(), HERE); + } + + m_writer.BeginStep(); + m_writer.Put(m_io.InquireVariable("Step"), &step); + m_writer.Put(m_io.InquireVariable("Time"), &time); + } + + void Writer::endSaving() { + raise::ErrorIf(p_adios == nullptr, "ADIOS pointer is null", HERE); + if (!m_writing_mode) { + raise::Fatal("Not writing", HERE); + } + m_writing_mode = false; + m_writer.EndStep(); + m_writer.Close(); + + // optionally remove the oldest checkpoint + CallOnce([&]() { + if (m_keep > 0 and m_written.size() > (std::size_t)m_keep) { + const auto oldest = m_written.front(); + if (std::filesystem::exists(oldest.first) and + std::filesystem::exists(oldest.second)) { + std::filesystem::remove_all(oldest.first); + std::filesystem::remove(oldest.second); + m_written.erase(m_written.begin()); + } else { + raise::Warning("Checkpoint file does not exist for some reason", HERE); + } + } + }); + } + +} // namespace checkpoint diff --git a/src/checkpoint/writer.h b/src/output/checkpoint.h similarity index 65% rename from src/checkpoint/writer.h rename to src/output/checkpoint.h index 2750e2226..495b97b73 100644 --- a/src/checkpoint/writer.h +++ b/src/output/checkpoint.h @@ -1,24 +1,21 @@ /** - * @file checkpoint/writer.h - * @brief Class that dumps checkpoints + * @file output/checkpoint.h + * @brief Class that handles checkpoint writing * @implements * - checkpoint::Writer * @cpp: - * - writer.cpp + * - checkpoint.cpp * @namespaces: * - checkpoint:: */ -#ifndef CHECKPOINT_WRITER_H -#define CHECKPOINT_WRITER_H +#ifndef OUTPUT_CHECKPOINT_H +#define OUTPUT_CHECKPOINT_H -#include "enums.h" #include "global.h" #include "utils/tools.h" -#include "framework/parameters.h" - #include #include @@ -60,16 +57,6 @@ namespace checkpoint { void beginSaving(timestep_t, simtime_t); void endSaving(); - void saveAttrs(const ntt::SimulationParams&, simtime_t); - - template - void saveField(const std::string&, const ndfield_t&); - - void defineFieldVariables(const ntt::SimEngine&, - const std::vector&, - const std::vector&, - const std::vector&); - [[nodiscard]] auto io() -> adios2::IO& { return m_io; @@ -80,6 +67,11 @@ namespace checkpoint { return m_writer; } + [[nodiscard]] + auto written() const -> const std::vector>& { + return m_written; + } + [[nodiscard]] auto enabled() const -> bool { return m_enabled; @@ -88,4 +80,4 @@ namespace checkpoint { } // namespace checkpoint -#endif // CHECKPOINT_WRITER_H +#endif // OUTPUT_CHECKPOINT_H From 99137f41fbaf78c1c0ccee32a090be87009c8c30 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 21 Oct 2025 16:08:46 -0400 Subject: [PATCH 091/154] comm for particles placed in struct + pld comm --- src/engines/engine_printer.cpp | 3 +- src/framework/containers/particles.h | 19 + src/framework/containers/particles_comm.cpp | 394 ++++++++++++++++++++ src/framework/domain/comm_mpi.hpp | 289 -------------- src/framework/domain/comm_nompi.hpp | 2 - src/framework/domain/communications.cpp | 91 +---- src/kernels/comm.hpp | 74 ++-- 7 files changed, 485 insertions(+), 387 deletions(-) create mode 100644 src/framework/containers/particles_comm.cpp diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 20f5e81ba..cbfde9304 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -400,7 +400,8 @@ namespace ntt { add_param(report, 6, "GCA", "%s", species.use_gca() ? "ON" : "OFF"); } add_param(report, 6, "Cooling", "%s", species.cooling().to_string()); - add_param(report, 6, "# of payloads", "%d", species.npld()); + add_param(report, 6, "# of real-value payloads", "%d", species.npld_r()); + add_param(report, 6, "# of integer-value payloads", "%d", species.npld_i()); } report.pop_back(); }, diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index c85759035..47758b0d9 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -15,6 +15,7 @@ #include "enums.h" #include "global.h" +#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "utils/error.h" #include "utils/formatting.h" @@ -253,6 +254,24 @@ namespace ntt { */ void SyncHostDevice(); +#if defined(MPI_ENABLED) + /** + * @brief Communicate particles across neighboring meshblocks + * @param dirs_to_comm The directions requiring communication + * @param shifts_in_x1 The coordinate shifts in x1 direction per each communicated particle + * @param shifts_in_x2 The coordinate shifts in x2 direction per each communicated particle + * @param shifts_in_x3 The coordinate shifts in x3 direction per each communicated particle + * @param send_ranks The map of ranks per each send direction + * @param recv_ranks The map of ranks per each recv direction + */ + void Communicate(const dir::dirs_t&, + const array_t&, + const array_t&, + const array_t&, + const dir::map_t&, + const dir::map_t&); +#endif + #if defined(OUTPUT_ENABLED) void CheckpointDeclare(adios2::IO&) const; void CheckpointRead(adios2::IO&, adios2::Engine&, std::size_t, std::size_t); diff --git a/src/framework/containers/particles_comm.cpp b/src/framework/containers/particles_comm.cpp new file mode 100644 index 000000000..4d6d67118 --- /dev/null +++ b/src/framework/containers/particles_comm.cpp @@ -0,0 +1,394 @@ +#include "enums.h" +#include "global.h" + +#include "arch/directions.h" +#include "arch/kokkos_aliases.h" +#include "arch/mpi_aliases.h" +#include "arch/mpi_tags.h" +#include "utils/error.h" +#include "utils/formatting.h" +#include "utils/log.h" + +#include "framework/containers/particles.h" + +#include "kernels/comm.hpp" + +#include + +#include +#include + +namespace ntt { + + namespace prtls { + template + void send_recv(array_t& send_arr, + array_t& recv_arr, + int send_rank, + int recv_rank, + npart_t nsend, + npart_t nrecv, + npart_t offset) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Sendrecv(send_arr.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr.data() + offset, + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + const auto slice = std::make_pair(offset, offset + nrecv); + + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + auto recv_arr_h = Kokkos::create_mirror_view( + Kokkos::subview(recv_arr, slice)); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Sendrecv(send_arr_h.data(), + nsend, + mpi::get_type(), + send_rank, + 0, + recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); +#endif + } + + void send_recv_count(int send_rank, + int recv_rank, + npart_t send_count, + npart_t& recv_count) { + if ((send_rank >= 0) && (recv_rank >= 0)) { + MPI_Sendrecv(&send_count, + 1, + mpi::get_type(), + send_rank, + 0, + &recv_count, + 1, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else if (send_rank >= 0) { + MPI_Send(&send_count, 1, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); + } else if (recv_rank >= 0) { + MPI_Recv(&recv_count, + 1, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } else { + raise::Error("ParticleSendRecvCount called with negative ranks", HERE); + } + } + + template + void send(array_t& send_arr, int send_rank, npart_t nsend) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); +#else + auto send_arr_h = Kokkos::create_mirror_view(send_arr); + Kokkos::deep_copy(send_arr_h, send_arr); + MPI_Send(send_arr_h.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); +#endif + } + + template + void recv(array_t& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) { +#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) + MPI_Recv(recv_arr.data() + offset, + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); +#else + const auto slice = std::make_pair(offset, offset + nrecv); + + auto recv_arr_h = Kokkos::create_mirror_view( + Kokkos::subview(recv_arr, slice)); + MPI_Recv(recv_arr_h.data(), + nrecv, + mpi::get_type(), + recv_rank, + 0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); +#endif + } + + template + void communicate(array_t& send_arr, + array_t& recv_arr, + int send_rank, + int recv_rank, + npart_t nsend, + npart_t nrecv, + npart_t offset) { + if (send_rank >= 0 && recv_rank >= 0) { + raise::ErrorIf( + nrecv + offset > recv_arr.extent(0), + "recv_arr is not large enough to hold the received particles", + HERE); + send_recv(send_arr, recv_arr, send_rank, recv_rank, nsend, nrecv, offset); + } else if (send_rank >= 0) { + send(send_arr, send_rank, nsend); + } else if (recv_rank >= 0) { + raise::ErrorIf( + nrecv + offset > recv_arr.extent(0), + "recv_arr is not large enough to hold the received particles", + HERE); + recv(recv_arr, recv_rank, nrecv, offset); + } else { + raise::Error("CommunicateParticles called with negative ranks", HERE); + } + } + } // namespace prtls + + template + void Particles::Communicate(const dir::dirs_t& dirs_to_comm, + const array_t& shifts_in_x1, + const array_t& shifts_in_x2, + const array_t& shifts_in_x3, + const dir::map_t& send_ranks, + const dir::map_t& recv_ranks) { + logger::Checkpoint(fmt::format("Communicating species #%d\n", index()), HERE); + + // at this point particles should already be tagged in the pusher + auto [npptag_vec, tag_offsets] = NpartsPerTagAndOffsets(); + const auto npart_dead = npptag_vec[ParticleTag::dead]; + const auto npart_alive = npptag_vec[ParticleTag::alive]; + + // # of particles to receive per each tag (direction) + std::vector npptag_recv_vec(ntags() - 2, 0); + + // total # of received particles from all directions + npart_t npart_recv_tot = 0u; + + // loop dir + for (const auto& direction : dirs_to_comm) { + // tags corresponding to the direction (both send & recv) + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + + // get ranks of send/recv meshblocks + const auto send_rank = send_ranks.at(direction); + const auto recv_rank = recv_ranks.at(direction); + + // record the # of particles to-be-sent + const auto nsend = npptag_vec[tag_send]; + + // request the # of particles to-be-received ... + // ... and send the # of particles to-be-sent + npart_t nrecv = 0; + prtls::send_recv_count(send_rank, recv_rank, nsend, nrecv); + npart_recv_tot += nrecv; + npptag_recv_vec[tag_recv - 2] = nrecv; + + raise::ErrorIf((npart() + npart_recv_tot) >= maxnpart(), + "Too many particles to receive (cannot fit into maxptl)", + HERE); + } + + array_t outgoing_indices { "outgoing_indices", npart() - npart_alive }; + // clang-format off + Kokkos::parallel_for( + "PrepareOutgoingPrtls", + rangeActiveParticles(), + kernel::comm::PrepareOutgoingPrtls_kernel( + shifts_in_x1, shifts_in_x2, shifts_in_x3, + outgoing_indices, + npart(), npart_alive, npart_dead, ntags(), + i1, i1_prev, + i2, i2_prev, + i3, i3_prev, + tag, tag_offsets) + ); + // clang-format on + + // number of arrays of each type to send/recv + const unsigned short NREALS = 4 + static_cast( + D == Dim::_2D and C != Coord::Cart); + const unsigned short NINTS = 2 * static_cast(D); + const unsigned short NPRTLDX = 2 * static_cast(D); + const unsigned short NPLDS_R = npld_r(); + const unsigned short NPLDS_I = npld_i(); + + // buffers to store recv data + const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), + npptag_recv_vec.end(), + static_cast(0)); + array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; + array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; + array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; + array_t recv_buff_pld_r; + array_t recv_buff_pld_i; + + if (NPLDS_R > 0) { + recv_buff_pld_r = array_t { "recv_buff_pld_r", npart_recv * NPLDS_R }; + } + if (NPLDS_I > 0) { + recv_buff_pld_i = array_t { "recv_buff_pld_i", + npart_recv * NPLDS_I }; + } + + auto iteration = 0; + auto current_received = 0; + + for (const auto& direction : dirs_to_comm) { + const auto send_rank = send_ranks.at(direction); + const auto recv_rank = recv_ranks.at(direction); + const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); + const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); + const auto npart_send_in = npptag_vec[tag_send]; + const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; + if (send_rank < 0 and recv_rank < 0) { + continue; + } + array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; + array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; + array_t send_buff_prtldx { "send_buff_prtldx", + npart_send_in * NPRTLDX }; + array_t send_buff_pld_r; + array_t send_buff_pld_i; + if (NPLDS_R > 0) { + send_buff_pld_r = array_t { "send_buff_pld_r", + npart_send_in * NPLDS_R }; + } + if (NPLDS_I > 0) { + send_buff_pld_i = array_t { "send_buff_pld_i", + npart_send_in * NPLDS_I }; + } + + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + Kokkos::deep_copy(tag_offsets_h, tag_offsets); + + npart_t idx_offset = npart_dead; + if (tag_send > 2) { + idx_offset += tag_offsets_h(tag_send - 3); + } + // clang-format off + Kokkos::parallel_for( + "PopulatePrtlSendBuffer", + npart_send_in, + kernel::comm::PopulatePrtlSendBuffer_kernel( + send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld_r, send_buff_pld_i, + NINTS, NREALS, NPRTLDX, NPLDS_R, NPLDS_I, idx_offset, + i1, i1_prev, dx1, dx1_prev, + i2, i2_prev, dx2, dx2_prev, + i3, i3_prev, dx3, dx3_prev, + ux1, ux2, ux3, + weight, phi, pld_r, pld_i, tag, + outgoing_indices) + ); + // clang-format on + + const auto recv_offset_int = current_received * NINTS; + const auto recv_offset_real = current_received * NREALS; + const auto recv_offset_prtldx = current_received * NPRTLDX; + const auto recv_offset_pld_r = current_received * NPLDS_R; + const auto recv_offset_pld_i = current_received * NPLDS_I; + + prtls::communicate(send_buff_int, + recv_buff_int, + send_rank, + recv_rank, + npart_send_in * NINTS, + npart_recv_in * NINTS, + recv_offset_int); + prtls::communicate(send_buff_real, + recv_buff_real, + send_rank, + recv_rank, + npart_send_in * NREALS, + npart_recv_in * NREALS, + recv_offset_real); + prtls::communicate(send_buff_prtldx, + recv_buff_prtldx, + send_rank, + recv_rank, + npart_send_in * NPRTLDX, + npart_recv_in * NPRTLDX, + recv_offset_prtldx); + if (NPLDS_R > 0) { + prtls::communicate(send_buff_pld_r, + recv_buff_pld_r, + send_rank, + recv_rank, + npart_send_in * NPLDS_R, + npart_recv_in * NPLDS_R, + recv_offset_pld_r); + } + if (NPLDS_I > 0) { + prtls::communicate(send_buff_pld_i, + recv_buff_pld_i, + send_rank, + recv_rank, + npart_send_in * NPLDS_I, + npart_recv_in * NPLDS_I, + recv_offset_pld_i); + } + current_received += npart_recv_in; + iteration++; + + } // end direction loop + + // clang-format off + Kokkos::parallel_for( + "PopulateFromRecvBuffer", + npart_recv, + kernel::comm::ExtractReceivedPrtls_kernel( + recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld_r, recv_buff_pld_i, + NINTS, NREALS, NPRTLDX, NPLDS_R, NPLDS_I, + npart(), + i1, i1_prev, dx1, dx1_prev, + i2, i2_prev, dx2, dx2_prev, + i3, i3_prev, dx3, dx3_prev, + ux1, ux2, ux3, + weight, phi, pld_r, pld_i, tag, + outgoing_indices) + ); + // clang-format on + + const auto npart_holes = outgoing_indices.extent(0); + if (npart_recv > npart_holes) { + set_npart(npart() + npart_recv - npart_holes); + } + set_unsorted(); + } + +#define PARTICLES_COMM(D, C) \ + template void Particles::Communicate(const dir::dirs_t&, \ + const array_t&, \ + const array_t&, \ + const array_t&, \ + const dir::map_t&, \ + const dir::map_t&); + + PARTICLES_COMM(Dim::_1D, Coord::Cart) + PARTICLES_COMM(Dim::_2D, Coord::Cart) + PARTICLES_COMM(Dim::_3D, Coord::Cart) + PARTICLES_COMM(Dim::_2D, Coord::Sph) + PARTICLES_COMM(Dim::_2D, Coord::Qsph) + PARTICLES_COMM(Dim::_3D, Coord::Sph) + PARTICLES_COMM(Dim::_3D, Coord::Qsph) +#undef PARTICLES_COMM + +} // namespace ntt diff --git a/src/framework/domain/comm_mpi.hpp b/src/framework/domain/comm_mpi.hpp index e0d0cb4b2..266c844c5 100644 --- a/src/framework/domain/comm_mpi.hpp +++ b/src/framework/domain/comm_mpi.hpp @@ -11,23 +11,15 @@ #ifndef FRAMEWORK_DOMAIN_COMM_MPI_HPP #define FRAMEWORK_DOMAIN_COMM_MPI_HPP -#include "enums.h" #include "global.h" -#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/mpi_aliases.h" -#include "arch/mpi_tags.h" #include "utils/error.h" -#include "framework/containers/particles.h" - -#include "kernels/comm.hpp" - #include #include -#include #include namespace comm { @@ -131,116 +123,6 @@ namespace comm { } // namespace flds - namespace prtls { - template - void send_recv(array_t& send_arr, - array_t& recv_arr, - int send_rank, - int recv_rank, - npart_t nsend, - npart_t nrecv, - npart_t offset) { -#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) - MPI_Sendrecv(send_arr.data(), - nsend, - mpi::get_type(), - send_rank, - 0, - recv_arr.data() + offset, - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); -#else - const auto slice = std::make_pair(offset, offset + nrecv); - - auto send_arr_h = Kokkos::create_mirror_view(send_arr); - auto recv_arr_h = Kokkos::create_mirror_view( - Kokkos::subview(recv_arr, slice)); - Kokkos::deep_copy(send_arr_h, send_arr); - MPI_Sendrecv(send_arr_h.data(), - nsend, - mpi::get_type(), - send_rank, - 0, - recv_arr_h.data(), - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); -#endif - } - - template - void send(array_t& send_arr, int send_rank, npart_t nsend) { -#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) - MPI_Send(send_arr.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); -#else - auto send_arr_h = Kokkos::create_mirror_view(send_arr); - Kokkos::deep_copy(send_arr_h, send_arr); - MPI_Send(send_arr_h.data(), nsend, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); -#endif - } - - template - void recv(array_t& recv_arr, int recv_rank, npart_t nrecv, npart_t offset) { -#if !defined(DEVICE_ENABLED) || defined(GPU_AWARE_MPI) - MPI_Recv(recv_arr.data() + offset, - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); -#else - const auto slice = std::make_pair(offset, offset + nrecv); - - auto recv_arr_h = Kokkos::create_mirror_view( - Kokkos::subview(recv_arr, slice)); - MPI_Recv(recv_arr_h.data(), - nrecv, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - Kokkos::deep_copy(Kokkos::subview(recv_arr, slice), recv_arr_h); -#endif - } - - template - void communicate(array_t& send_arr, - array_t& recv_arr, - int send_rank, - int recv_rank, - npart_t nsend, - npart_t nrecv, - npart_t offset) { - if (send_rank >= 0 && recv_rank >= 0) { - raise::ErrorIf( - nrecv + offset > recv_arr.extent(0), - "recv_arr is not large enough to hold the received particles", - HERE); - send_recv(send_arr, recv_arr, send_rank, recv_rank, nsend, nrecv, offset); - } else if (send_rank >= 0) { - send(send_arr, send_rank, nsend); - } else if (recv_rank >= 0) { - raise::ErrorIf( - nrecv + offset > recv_arr.extent(0), - "recv_arr is not large enough to hold the received particles", - HERE); - recv(recv_arr, recv_rank, nrecv, offset); - } else { - raise::Error("CommunicateParticles called with negative ranks", HERE); - } - } - } // namespace prtls - template inline void CommunicateField(unsigned int idx, ndfield_t& fld, @@ -468,177 +350,6 @@ namespace comm { } } - void ParticleSendRecvCount(int send_rank, - int recv_rank, - npart_t send_count, - npart_t& recv_count) { - if ((send_rank >= 0) && (recv_rank >= 0)) { - MPI_Sendrecv(&send_count, - 1, - mpi::get_type(), - send_rank, - 0, - &recv_count, - 1, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else if (send_rank >= 0) { - MPI_Send(&send_count, 1, mpi::get_type(), send_rank, 0, MPI_COMM_WORLD); - } else if (recv_rank >= 0) { - MPI_Recv(&recv_count, - 1, - mpi::get_type(), - recv_rank, - 0, - MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } else { - raise::Error("ParticleSendRecvCount called with negative ranks", HERE); - } - } - - template - void CommunicateParticles(Particles& species, - const array_t& outgoing_indices, - const array_t& tag_offsets, - const std::vector& npptag_vec, - const std::vector& npptag_recv_vec, - const std::vector& send_ranks, - const std::vector& recv_ranks, - const dir::dirs_t& dirs_to_comm) { - // number of arrays of each type to send/recv - const unsigned short NREALS = 4 + static_cast( - D == Dim::_2D and C != Coord::Cart); - const unsigned short NINTS = 2 * static_cast(D); - const unsigned short NPRTLDX = 2 * static_cast(D); - const unsigned short NPLDS = species.npld(); - - // buffers to store recv data - const auto npart_dead = npptag_vec[ParticleTag::dead]; - const auto npart_recv = std::accumulate(npptag_recv_vec.begin(), - npptag_recv_vec.end(), - static_cast(0)); - array_t recv_buff_int { "recv_buff_int", npart_recv * NINTS }; - array_t recv_buff_real { "recv_buff_real", npart_recv * NREALS }; - array_t recv_buff_prtldx { "recv_buff_prtldx", npart_recv * NPRTLDX }; - array_t recv_buff_pld; - - if (NPLDS > 0) { - recv_buff_pld = array_t { "recv_buff_pld", npart_recv * NPLDS }; - } - - auto iteration = 0; - auto current_received = 0; - - for (const auto& direction : dirs_to_comm) { - const auto send_rank = send_ranks[iteration]; - const auto recv_rank = recv_ranks[iteration]; - const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); - const auto npart_send_in = npptag_vec[tag_send]; - const auto npart_recv_in = npptag_recv_vec[tag_recv - 2]; - if (send_rank < 0 and recv_rank < 0) { - continue; - } - array_t send_buff_int { "send_buff_int", npart_send_in * NINTS }; - array_t send_buff_real { "send_buff_real", npart_send_in * NREALS }; - array_t send_buff_prtldx { "send_buff_prtldx", - npart_send_in * NPRTLDX }; - array_t send_buff_pld; - if (NPLDS > 0) { - send_buff_pld = array_t { "send_buff_pld", npart_send_in * NPLDS }; - } - - auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - Kokkos::deep_copy(tag_offsets_h, tag_offsets); - - npart_t idx_offset = npart_dead; - if (tag_send > 2) { - idx_offset += tag_offsets_h(tag_send - 3); - } - // clang-format off - Kokkos::parallel_for( - "PopulatePrtlSendBuffer", - npart_send_in, - kernel::comm::PopulatePrtlSendBuffer_kernel( - send_buff_int, send_buff_real, send_buff_prtldx, send_buff_pld, - NINTS, NREALS, NPRTLDX, NPLDS, idx_offset, - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.pld, species.tag, - outgoing_indices) - ); - // clang-format on - - const auto recv_offset_int = current_received * NINTS; - const auto recv_offset_real = current_received * NREALS; - const auto recv_offset_prtldx = current_received * NPRTLDX; - const auto recv_offset_pld = current_received * NPLDS; - - prtls::communicate(send_buff_int, - recv_buff_int, - send_rank, - recv_rank, - npart_send_in * NINTS, - npart_recv_in * NINTS, - recv_offset_int); - prtls::communicate(send_buff_real, - recv_buff_real, - send_rank, - recv_rank, - npart_send_in * NREALS, - npart_recv_in * NREALS, - recv_offset_real); - prtls::communicate(send_buff_prtldx, - recv_buff_prtldx, - send_rank, - recv_rank, - npart_send_in * NPRTLDX, - npart_recv_in * NPRTLDX, - recv_offset_prtldx); - if (NPLDS > 0) { - prtls::communicate(send_buff_pld, - recv_buff_pld, - send_rank, - recv_rank, - npart_send_in * NPLDS, - npart_recv_in * NPLDS, - recv_offset_pld); - } - current_received += npart_recv_in; - iteration++; - - } // end direction loop - - // clang-format off - Kokkos::parallel_for( - "PopulateFromRecvBuffer", - npart_recv, - kernel::comm::ExtractReceivedPrtls_kernel( - recv_buff_int, recv_buff_real, recv_buff_prtldx, recv_buff_pld, - NINTS, NREALS, NPRTLDX, NPLDS, - species.npart(), - species.i1, species.i1_prev, species.dx1, species.dx1_prev, - species.i2, species.i2_prev, species.dx2, species.dx2_prev, - species.i3, species.i3_prev, species.dx3, species.dx3_prev, - species.ux1, species.ux2, species.ux3, - species.weight, species.phi, species.pld, species.tag, - outgoing_indices) - ); - // clang-format on - - const auto npart = species.npart(); - const auto npart_holes = outgoing_indices.extent(0); - if (npart_recv > npart_holes) { - species.set_npart(npart + npart_recv - npart_holes); - } - } - } // namespace comm #endif // FRAMEWORK_DOMAIN_COMM_MPI_HPP diff --git a/src/framework/domain/comm_nompi.hpp b/src/framework/domain/comm_nompi.hpp index b477ac176..d0cc36cbd 100644 --- a/src/framework/domain/comm_nompi.hpp +++ b/src/framework/domain/comm_nompi.hpp @@ -16,8 +16,6 @@ #include "arch/kokkos_aliases.h" #include "utils/error.h" -#include "framework/domain/domain.h" - #include namespace comm { diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/communications.cpp index bf6eb3dd1..59ddbf583 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/communications.cpp @@ -5,7 +5,6 @@ #include "utils/error.h" #include "utils/formatting.h" #include "utils/log.h" -#include "utils/timer.h" #include "metrics/kerr_schild.h" #include "metrics/kerr_schild_0.h" @@ -20,7 +19,6 @@ #include "arch/mpi_tags.h" #include "framework/domain/comm_mpi.hpp" - #include "kernels/comm.hpp" #else #include "framework/domain/comm_nompi.hpp" #endif @@ -587,45 +585,30 @@ namespace ntt { for (auto& species : domain.species) { const auto ntags = species.ntags(); - // at this point particles should already be tagged in the pusher - auto [npptag_vec, tag_offsets] = species.NpartsPerTagAndOffsets(); - const auto npart_dead = npptag_vec[ParticleTag::dead]; - const auto npart_alive = npptag_vec[ParticleTag::alive]; - - const auto npart = species.npart(); - - // # of particles to receive per each tag (direction) - std::vector npptag_recv_vec(ntags - 2, 0); // coordinate shifts per each direction - array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; - array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; - array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; - auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); - auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); - auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); + array_t shifts_in_x1 { "shifts_in_x1", ntags - 2 }; + array_t shifts_in_x2 { "shifts_in_x2", ntags - 2 }; + array_t shifts_in_x3 { "shifts_in_x3", ntags - 2 }; + auto shifts_in_x1_h = Kokkos::create_mirror_view(shifts_in_x1); + auto shifts_in_x2_h = Kokkos::create_mirror_view(shifts_in_x2); + auto shifts_in_x3_h = Kokkos::create_mirror_view(shifts_in_x3); // all directions requiring communication dir::dirs_t dirs_to_comm; // ranks & indices of meshblock to send/recv from - std::vector send_ranks, send_inds; - std::vector recv_ranks, recv_inds; - - // total # of reaceived particles from all directions - npart_t npart_recv = 0u; + dir::map_t send_ranks; + dir::map_t recv_ranks; for (const auto& direction : dir::Directions::all) { // tags corresponding to the direction (both send & recv) - const auto tag_recv = mpi::PrtlSendTag::dir2tag(-direction); const auto tag_send = mpi::PrtlSendTag::dir2tag(direction); // get indices & ranks of send/recv meshblocks const auto [send_params, - recv_params] = GetSendRecvParams(this, domain, direction, true); - const auto [send_indrank, send_slice] = send_params; - const auto [recv_indrank, recv_slice] = recv_params; - const auto [send_ind, send_rank] = send_indrank; - const auto [recv_ind, recv_rank] = recv_indrank; + recv_params] = GetSendRecvRanks(this, domain, direction); + const auto [send_ind, send_rank] = send_params; + const auto [recv_ind, recv_rank] = recv_params; // skip if no communication is necessary const auto is_sending = (send_rank >= 0); @@ -634,24 +617,8 @@ namespace ntt { continue; } dirs_to_comm.push_back(direction); - send_ranks.push_back(send_rank); - recv_ranks.push_back(recv_rank); - send_inds.push_back(send_ind); - recv_inds.push_back(recv_ind); - - // record the # of particles to-be-sent - const auto nsend = npptag_vec[tag_send]; - - // request the # of particles to-be-received ... - // ... and send the # of particles to-be-sent - npart_t nrecv = 0; - comm::ParticleSendRecvCount(send_rank, recv_rank, nsend, nrecv); - npart_recv += nrecv; - npptag_recv_vec[tag_recv - 2] = nrecv; - - raise::ErrorIf((npart + npart_recv) >= species.maxnpart(), - "Too many particles to receive (cannot fit into maxptl)", - HERE); + send_ranks[direction] = send_rank; + recv_ranks[direction] = recv_rank; // if sending, record displacements to apply before // ... tag_send - 2: because we only shift tags > 2 (i.e. no dead/alive) @@ -689,31 +656,13 @@ namespace ntt { Kokkos::deep_copy(shifts_in_x2, shifts_in_x2_h); Kokkos::deep_copy(shifts_in_x3, shifts_in_x3_h); - array_t outgoing_indices { "outgoing_indices", npart - npart_alive }; - // clang-format off - Kokkos::parallel_for( - "PrepareOutgoingPrtls", - species.rangeActiveParticles(), - kernel::comm::PrepareOutgoingPrtls_kernel( - shifts_in_x1, shifts_in_x2, shifts_in_x3, - outgoing_indices, - npart, npart_alive, npart_dead, ntags, - species.i1, species.i1_prev, - species.i2, species.i2_prev, - species.i3, species.i3_prev, - species.tag, tag_offsets) - ); - // clang-format on - - comm::CommunicateParticles(species, - outgoing_indices, - tag_offsets, - npptag_vec, - npptag_recv_vec, - send_ranks, - recv_ranks, - dirs_to_comm); - species.set_unsorted(); + species.Communicate(dirs_to_comm, + shifts_in_x1, + shifts_in_x2, + shifts_in_x3, + send_ranks, + recv_ranks); + } // end species loop #else (void)domain; diff --git a/src/kernels/comm.hpp b/src/kernels/comm.hpp index 60251d8c6..9f2645cdc 100644 --- a/src/kernels/comm.hpp +++ b/src/kernels/comm.hpp @@ -110,15 +110,17 @@ namespace kernel::comm { array_t send_buff_int; array_t send_buff_real; array_t send_buff_prtldx; - array_t send_buff_pld; + array_t send_buff_pld_r; + array_t send_buff_pld_i; - const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS_R, NPLDS_I; const npart_t idx_offset; const array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; const array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; const array_t ux1, ux2, ux3, weight, phi; - const array_t pld; + const array_t pld_r; + const array_t pld_i; array_t tag; const array_t outgoing_indices; @@ -126,11 +128,13 @@ namespace kernel::comm { PopulatePrtlSendBuffer_kernel(array_t& send_buff_int, array_t& send_buff_real, array_t& send_buff_prtldx, - array_t& send_buff_pld, + array_t& send_buff_pld_r, + array_t& send_buff_pld_i, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, - unsigned short NPLDS, + unsigned short NPLDS_R, + unsigned short NPLDS_I, npart_t idx_offset, const array_t& i1, const array_t& i1_prev, @@ -149,17 +153,20 @@ namespace kernel::comm { const array_t& ux3, const array_t& weight, const array_t& phi, - const array_t& pld, + const array_t& pld_r, + const array_t& pld_i, array_t& tag, const array_t& outgoing_indices) : send_buff_int { send_buff_int } , send_buff_real { send_buff_real } , send_buff_prtldx { send_buff_prtldx } - , send_buff_pld { send_buff_pld } + , send_buff_pld_r { send_buff_pld_r } + , send_buff_pld_i { send_buff_pld_i } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } - , NPLDS { NPLDS } + , NPLDS_R { NPLDS_R } + , NPLDS_I { NPLDS_I } , idx_offset { idx_offset } , i1 { i1 } , i1_prev { i1_prev } @@ -178,7 +185,8 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } - , pld { pld } + , pld_r { pld_r } + , pld_i { pld_i } , tag { tag } , outgoing_indices { outgoing_indices } {} @@ -209,9 +217,14 @@ namespace kernel::comm { if constexpr (D == Dim::_2D and C != Coord::Cart) { send_buff_real(NREALS * p + 4) = phi(idx); } - if (NPLDS > 0) { - for (auto l { 0u }; l < NPLDS; ++l) { - send_buff_pld(NPLDS * p + l) = pld(idx, l); + if (NPLDS_R > 0) { + for (auto l { 0u }; l < NPLDS_R; ++l) { + send_buff_pld_r(NPLDS_R * p + l) = pld_r(idx, l); + } + } + if (NPLDS_I > 0) { + for (auto l { 0u }; l < NPLDS_I; ++l) { + send_buff_pld_i(NPLDS_I * p + l) = pld_i(idx, l); } } tag(idx) = ParticleTag::dead; @@ -223,15 +236,17 @@ namespace kernel::comm { const array_t recv_buff_int; const array_t recv_buff_real; const array_t recv_buff_prtldx; - const array_t recv_buff_pld; + const array_t recv_buff_pld_r; + const array_t recv_buff_pld_i; - const unsigned short NINTS, NREALS, NPRTLDX, NPLDS; + const unsigned short NINTS, NREALS, NPRTLDX, NPLDS_R, NPLDS_I; const npart_t npart, npart_holes; array_t i1, i1_prev, i2, i2_prev, i3, i3_prev; array_t dx1, dx1_prev, dx2, dx2_prev, dx3, dx3_prev; array_t ux1, ux2, ux3, weight, phi; - array_t pld; + array_t pld_r; + array_t pld_i; array_t tag; const array_t outgoing_indices; @@ -239,11 +254,13 @@ namespace kernel::comm { ExtractReceivedPrtls_kernel(const array_t& recv_buff_int, const array_t& recv_buff_real, const array_t& recv_buff_prtldx, - const array_t& recv_buff_pld, + const array_t& recv_buff_pld_r, + const array_t& recv_buff_pld_i, unsigned short NINTS, unsigned short NREALS, unsigned short NPRTLDX, - unsigned short NPLDS, + unsigned short NPLDS_R, + unsigned short NPLDS_I, npart_t npart, array_t& i1, array_t& i1_prev, @@ -262,17 +279,20 @@ namespace kernel::comm { array_t& ux3, array_t& weight, array_t& phi, - array_t& pld, + array_t& pld_r, + array_t& pld_i, array_t& tag, const array_t& outgoing_indices) : recv_buff_int { recv_buff_int } , recv_buff_real { recv_buff_real } , recv_buff_prtldx { recv_buff_prtldx } - , recv_buff_pld { recv_buff_pld } + , recv_buff_pld_r { recv_buff_pld_r } + , recv_buff_pld_i { recv_buff_pld_i } , NINTS { NINTS } , NREALS { NREALS } , NPRTLDX { NPRTLDX } - , NPLDS { NPLDS } + , NPLDS_R { NPLDS_R } + , NPLDS_I { NPLDS_I } , npart { npart } , npart_holes { outgoing_indices.extent(0) } , i1 { i1 } @@ -292,7 +312,8 @@ namespace kernel::comm { , ux3 { ux3 } , weight { weight } , phi { phi } - , pld { pld } + , pld_r { pld_r } + , pld_i { pld_i } , tag { tag } , outgoing_indices { outgoing_indices } {} @@ -328,9 +349,14 @@ namespace kernel::comm { if constexpr (D == Dim::_2D and C != Coord::Cart) { phi(idx) = recv_buff_real(NREALS * p + 4); } - if (NPLDS > 0) { - for (auto l { 0u }; l < NPLDS; ++l) { - pld(idx, l) = recv_buff_pld(NPLDS * p + l); + if (NPLDS_R > 0) { + for (auto l { 0u }; l < NPLDS_R; ++l) { + pld_r(idx, l) = recv_buff_pld_r(NPLDS_R * p + l); + } + } + if (NPLDS_I > 0) { + for (auto l { 0u }; l < NPLDS_I; ++l) { + pld_i(idx, l) = recv_buff_pld_i(NPLDS_I * p + l); } } tag(idx) = ParticleTag::alive; From 53bff3c6cdb9b32983edf27f36dbd38638a4aacc Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 21 Oct 2025 16:08:58 -0400 Subject: [PATCH 092/154] generics for write ndfield --- src/output/utils/readers.cpp | 49 +++++++++++++++++++++++++++++++----- src/output/utils/readers.h | 8 ++++++ src/output/utils/writers.cpp | 46 +++++++++++++++++++++++++-------- src/output/utils/writers.h | 7 ++++++ 4 files changed, 94 insertions(+), 16 deletions(-) diff --git a/src/output/utils/readers.cpp b/src/output/utils/readers.cpp index d2e866af1..53746dcec 100644 --- a/src/output/utils/readers.cpp +++ b/src/output/utils/readers.cpp @@ -20,8 +20,10 @@ namespace out { std::size_t local_offset) { auto var = io.InquireVariable(quantity); if (var) { + T read_data; var.SetSelection(adios2::Box({ local_offset }, { 1 })); - reader.Get(var, &data, adios2::Mode::Sync); + reader.Get(var, &read_data, adios2::Mode::Sync); + data = read_data; } else { raise::Error(fmt::format("Variable: %s not found", quantity.c_str()), HERE); } @@ -70,6 +72,24 @@ namespace out { } } + template + void ReadNDField(adios2::IO& io, + adios2::Engine& reader, + const std::string& quantity, + ndfield_t& data, + const adios2::Box& range) { + auto var = io.InquireVariable(quantity); + if (var) { + var.SetSelection(range); + + auto data_h = Kokkos::create_mirror_view(data); + reader.Get(var, data_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(data, data_h); + } else { + raise::Error(fmt::format("Variable: %s not found", quantity.c_str()), HERE); + } + } + #define ARRAY_READERS(T) \ template void ReadVariable(adios2::IO&, \ adios2::Engine&, \ @@ -88,12 +108,29 @@ namespace out { array_t&, \ unsigned short, \ std::size_t, \ - std::size_t); \ - ARRAY_READERS(int) \ - ARRAY_READERS(unsigned int) \ - ARRAY_READERS(unsigned long int) \ - ARRAY_READERS(double) \ + std::size_t); + + ARRAY_READERS(short) + ARRAY_READERS(unsigned short) + ARRAY_READERS(int) + ARRAY_READERS(unsigned int) + ARRAY_READERS(unsigned long int) + ARRAY_READERS(double) ARRAY_READERS(float) #undef ARRAY_READERS +#define NDFIELD_READERS(D, N) \ + template void ReadNDField(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + ndfield_t&, \ + const adios2::Box&); + NDFIELD_READERS(Dim::_1D, 3) + NDFIELD_READERS(Dim::_1D, 6) + NDFIELD_READERS(Dim::_2D, 3) + NDFIELD_READERS(Dim::_2D, 6) + NDFIELD_READERS(Dim::_3D, 3) + NDFIELD_READERS(Dim::_3D, 6) +#undef NDFIELD_READERS + } // namespace out diff --git a/src/output/utils/readers.h b/src/output/utils/readers.h index b4fdc8ab0..c62489282 100644 --- a/src/output/utils/readers.h +++ b/src/output/utils/readers.h @@ -6,6 +6,7 @@ * - out::ReadVariable<> -> void * - out::Read1DArray<> -> void * - out::Read2DArray<> -> void + * - out::ReadNDField<> -> void * @cpp: * - readers.cpp * @namespaces: @@ -43,6 +44,13 @@ namespace out { std::size_t, std::size_t); + template + void ReadNDField(adios2::IO&, + adios2::Engine&, + const std::string&, + ndfield_t&, + const adios2::Box&); + } // namespace out #endif // OUTPUT_UTILS_READERS_H diff --git a/src/output/utils/writers.cpp b/src/output/utils/writers.cpp index c7d1bbd74..b16ca9e8d 100644 --- a/src/output/utils/writers.cpp +++ b/src/output/utils/writers.cpp @@ -15,7 +15,7 @@ namespace out { const T& data, std::size_t global_size, std::size_t local_offset) { - auto var = io.InquireVariable(name); + auto var = io.InquireVariable(name); var.SetShape({ global_size }); var.SetSelection(adios2::Box({ local_offset }, { 1 })); writer.Put(var, &data); @@ -27,8 +27,8 @@ namespace out { const std::string& name, const array_t& data, std::size_t local_size, - std::size_t local_offset, - std::size_t global_size) { + std::size_t global_size, + std::size_t local_offset) { const auto slice = range_tuple_t(0, local_size); auto var = io.InquireVariable(name); var.SetShape({ global_size }); @@ -47,8 +47,8 @@ namespace out { const array_t& data, unsigned short dim2_size, std::size_t local_size, - std::size_t local_offset, - std::size_t global_size) { + std::size_t global_size, + std::size_t local_offset) { const auto slice = range_tuple_t(0, local_size); auto var = io.InquireVariable(name); @@ -62,6 +62,16 @@ namespace out { writer.Put(var, data_sub.data(), adios2::Mode::Sync); } + template + void WriteNDField(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const ndfield_t& data) { + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + writer.Put(io.InquireVariable(name), data_h.data(), adios2::Mode::Sync); + } + #define ARRAY_WRITERS(T) \ template void WriteVariable(adios2::IO&, \ adios2::Engine&, \ @@ -83,12 +93,28 @@ namespace out { unsigned short, \ std::size_t, \ std::size_t, \ - std::size_t); \ - ARRAY_WRITERS(int) \ - ARRAY_WRITERS(unsigned int) \ - ARRAY_WRITERS(unsigned long int) \ - ARRAY_WRITERS(double) \ + std::size_t); + + ARRAY_WRITERS(short) + ARRAY_WRITERS(unsigned short) + ARRAY_WRITERS(int) + ARRAY_WRITERS(unsigned int) + ARRAY_WRITERS(unsigned long int) + ARRAY_WRITERS(double) ARRAY_WRITERS(float) #undef ARRAY_WRITERS +#define NDFIELD_WRITERS(D, N) \ + template void WriteNDField(adios2::IO&, \ + adios2::Engine&, \ + const std::string&, \ + const ndfield_t&); + NDFIELD_WRITERS(Dim::_1D, 3) + NDFIELD_WRITERS(Dim::_1D, 6) + NDFIELD_WRITERS(Dim::_2D, 3) + NDFIELD_WRITERS(Dim::_2D, 6) + NDFIELD_WRITERS(Dim::_3D, 3) + NDFIELD_WRITERS(Dim::_3D, 6) +#undef NDFIELD_WRITERS + } // namespace out diff --git a/src/output/utils/writers.h b/src/output/utils/writers.h index 3e4bd4cb8..58fac2bf6 100644 --- a/src/output/utils/writers.h +++ b/src/output/utils/writers.h @@ -6,6 +6,7 @@ * - out::WriteVariable<> -> void * - out::Write1DArray<> -> void * - out::Write2DArray<> -> void + * - out::WriteNDField<> -> void * @cpp: * - writers.cpp * @namespaces: @@ -50,6 +51,12 @@ namespace out { std::size_t, std::size_t); + template + void WriteNDField(adios2::IO&, + adios2::Engine&, + const std::string&, + const ndfield_t&); + } // namespace out #endif // OUTPUT_UTILS_WRITERS_H From ce4e96f9117fe818188d549550f22f7c39b3a93b Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 21 Oct 2025 16:09:07 -0400 Subject: [PATCH 093/154] minor (rm ntt_checkpoint) --- cmake/benchmark.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake index 39b075716..fdd8438ea 100644 --- a/cmake/benchmark.cmake +++ b/cmake/benchmark.cmake @@ -20,7 +20,7 @@ add_executable(${exec} ${src}) set(libs ntt_global ntt_metrics ntt_kernels ntt_archetypes ntt_framework) if(${output}) - list(APPEND libs ntt_output ntt_checkpoint) + list(APPEND libs ntt_output) endif() add_dependencies(${exec} ${libs}) target_link_libraries(${exec} PRIVATE ${libs} stdc++fs) From 4adee23d9e00579eb4ecd10a70302f53a8b01ef7 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 22 Oct 2025 16:21:36 -0500 Subject: [PATCH 094/154] temporary bugfix --- src/framework/parameters.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/framework/parameters.cpp b/src/framework/parameters.cpp index a14aee999..7a35f1c80 100644 --- a/src/framework/parameters.cpp +++ b/src/framework/parameters.cpp @@ -416,6 +416,9 @@ namespace ntt { toml::find_or(toml_data, "algorithms", "deposit", "order", 1)); /* [algorithms.fieldsolver] --------------------------------------------- */ + set("algorithms.fieldsolver.enable", + toml::find_or(toml_data, "algorithms", "fieldsolver", "enable", true)); + set("algorithms.fieldsolver.delta_x", toml::find_or(toml_data, "algorithms", From 3848cc3e7b0ec61eca3870d8186d7604b6f1f116 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 22 Oct 2025 18:41:32 -0500 Subject: [PATCH 095/154] Esirkepov 1D --- src/kernels/currents_deposit.hpp | 62 ++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 18955e795..88d6edc00 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -420,8 +420,64 @@ namespace kernel { fS_x1); if constexpr (D == Dim::_1D) { - // ToDo - raise::KernelNotImplementedError(HERE); + // define weight vectors + real_t Wx1[O + 2]; + real_t Wx23[O + 2]; + + // first seperate + Wx1[0] = fS_x1[0]; + Wx23[0] = HALF * fS_x1[0]; + // last seperate + Wx1[O + 1] = -iS_x1[O + 1]; + Wx23[O + 1] = HALF * iS_x1[O + 1]; + + // Calculate weight function +#pragma unroll + for (int i = 1; i < O + 1; ++i) { + // Esirkepov 2001, Eq. 38 for 1D case + Wx1[i] = fS_x1[i] - iS_x1[i - 1]; + Wx23[i] = HALF * (fS_x1[i] + iS_x1[i - 1]); + } + + // contribution within the shape function stencil + real_t jx1[O + 2]; + + // prefactors for j update + const real_t Qdx1dt = coeff * inv_dt; + const real_t QVx2 = coeff * vp[1]; + const real_t QVx3 = coeff * vp[2]; + + // Calculate current contribution + jx1[0] = -Qdx1dt * Wx1[0]; +#pragma unroll + for (int i = 1; i < O + 2; ++i) { + jx1[i] = jx1[i - 1] - Qdx1dt * Wx1[i]; + } + + // account for ghost cells + i1_min += N_GHOSTS; + i1_max += N_GHOSTS; + + // get number of update indices for asymmetric movement + const int di_x1 = i1_max - i1_min; + + /* + Current update + */ + auto J_acc = J.access(); + + for (int i = 0; i < di_x1; ++i) { + J_acc(i1_min + i, cur::jx1) += jx1[i]; + } + + for (int i = 0; i <= di_x1; ++i) { + J_acc(i1_min + i, cur::jx2) += QVx2 * Wx23[i]; + } + + for (int i = 0; i <= di_x1; ++i) { + J_acc(i1_min + i, cur::jx3) += QVx3 * Wx23[i]; + } + } else if constexpr (D == Dim::_2D) { // shape function in dim1 -> always required @@ -607,7 +663,7 @@ namespace kernel { jx1[0][j][k] = -Qdxdt * Wx1[0][j][k]; } } - + #pragma unroll for (int i = 1; i < O + 2; ++i) { #pragma unroll From 662b7f7677d684386c7e1af926977f03ccd3c942 Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Oct 2025 15:49:36 -0400 Subject: [PATCH 096/154] experimental removed (WIP) --- src/archetypes/energy_dist.h | 309 ++++++----------- src/archetypes/particle_injector.h | 514 ++++++++++++++--------------- src/archetypes/utils.h | 39 +-- src/kernels/injectors.hpp | 387 ++++++++++------------ 4 files changed, 541 insertions(+), 708 deletions(-) diff --git a/src/archetypes/energy_dist.h b/src/archetypes/energy_dist.h index bcb88fbc3..f8ef1175a 100644 --- a/src/archetypes/energy_dist.h +++ b/src/archetypes/energy_dist.h @@ -208,228 +208,125 @@ namespace arch { struct Maxwellian : public EnergyDistribution { using EnergyDistribution::metric; - Maxwellian(const M& metric, - random_number_pool_t& pool, - real_t temperature, - real_t boost_vel = ZERO, - in boost_direction = in::x1, - bool zero_current = true) + Maxwellian(const M& metric, + random_number_pool_t& pool, + real_t temperature, + const std::vector& drift_four_vel = { ZERO, ZERO, ZERO }) : EnergyDistribution { metric } , pool { pool } - , temperature { temperature } - , boost_velocity { boost_vel } - , boost_direction { boost_direction } - , zero_current { zero_current } { + , temperature { temperature } { + raise::ErrorIf(drift_four_vel.size() != 3, + "Maxwellian: Drift velocity must be a 3D vector", + HERE); raise::ErrorIf(temperature < ZERO, "Maxwellian: Temperature must be non-negative", HERE); - raise::ErrorIf( - (not cmp::AlmostZero_host(boost_vel, ZERO)) && (M::CoordType != Coord::Cart), - "Maxwellian: Boosting is only supported in Cartesian coordinates", - HERE); - } - - Inline void operator()(const coord_t&, - vec_t& v, - spidx_t sp = 0) const { - SampleFromMaxwellian(v, - pool, - temperature, - boost_velocity, - boost_direction, - not zero_current and - sp % 2 == 0); - } - - private: - random_number_pool_t pool; - - const real_t temperature; - const real_t boost_velocity; - const in boost_direction; - const bool zero_current; - }; - - template - struct TwoTemperatureMaxwellian : public EnergyDistribution { - using EnergyDistribution::metric; - - TwoTemperatureMaxwellian(const M& metric, - random_number_pool_t& pool, - const std::pair& temperatures, - const std::pair& species, - real_t boost_vel = ZERO, - in boost_direction = in::x1, - bool zero_current = true) - : EnergyDistribution { metric } - , pool { pool } - , temperature_1 { temperatures.first } - , temperature_2 { temperatures.second } - , sp_1 { species.first } - , sp_2 { species.second } - , boost_velocity { boost_vel } - , boost_direction { boost_direction } - , zero_current { zero_current } { - raise::ErrorIf( - (temperature_1 < ZERO) or (temperature_2 < ZERO), - "TwoTemperatureMaxwellian: Temperature must be non-negative", - HERE); - raise::ErrorIf((not cmp::AlmostZero(boost_vel, ZERO)) && - (M::CoordType != Coord::Cart), - "TwoTemperatureMaxwellian: Boosting is only supported in " - "Cartesian coordinates", - HERE); - } - - Inline void operator()(const coord_t&, - vec_t& v, - spidx_t sp = 0) const { - SampleFromMaxwellian( - v, - pool, - (sp == sp_1) ? temperature_1 : temperature_2, - boost_velocity, - boost_direction, - not zero_current and sp == sp_1); - } - - private: - random_number_pool_t pool; - - const real_t temperature_1, temperature_2; - const spidx_t sp_1, sp_2; - const real_t boost_velocity; - const in boost_direction; - const bool zero_current; - }; - - namespace experimental { - - template - struct Maxwellian : public EnergyDistribution { - using EnergyDistribution::metric; - - Maxwellian(const M& metric, - random_number_pool_t& pool, - real_t temperature, - const std::vector& drift_four_vel = { ZERO, ZERO, ZERO }) - : EnergyDistribution { metric } - , pool { pool } - , temperature { temperature } { - raise::ErrorIf(drift_four_vel.size() != 3, - "Maxwellian: Drift velocity must be a 3D vector", - HERE); - raise::ErrorIf(temperature < ZERO, - "Maxwellian: Temperature must be non-negative", - HERE); - if constexpr (M::CoordType == Coord::Cart) { - drift_4vel = NORM(drift_four_vel[0], drift_four_vel[1], drift_four_vel[2]); - if (cmp::AlmostZero_host(drift_4vel)) { - drift_dir = 0; - } else { - drift_3vel = drift_4vel / math::sqrt(ONE + SQR(drift_4vel)); - drift_dir_x1 = drift_four_vel[0] / drift_4vel; - drift_dir_x2 = drift_four_vel[1] / drift_4vel; - drift_dir_x3 = drift_four_vel[2] / drift_4vel; - - // assume drift is in an arbitrary direction - drift_dir = 4; - // check whether drift is in one of principal directions - for (auto d { 0u }; d < 3u; ++d) { - const auto dprev = (d + 2) % 3; - const auto dnext = (d + 1) % 3; - if (cmp::AlmostZero_host(drift_four_vel[dprev]) and - cmp::AlmostZero_host(drift_four_vel[dnext])) { - drift_dir = SIGN(drift_four_vel[d]) * (d + 1); - break; - } + if constexpr (M::CoordType == Coord::Cart) { + drift_4vel = NORM(drift_four_vel[0], drift_four_vel[1], drift_four_vel[2]); + if (cmp::AlmostZero_host(drift_4vel)) { + drift_dir = 0; + } else { + drift_3vel = drift_4vel / math::sqrt(ONE + SQR(drift_4vel)); + drift_dir_x1 = drift_four_vel[0] / drift_4vel; + drift_dir_x2 = drift_four_vel[1] / drift_4vel; + drift_dir_x3 = drift_four_vel[2] / drift_4vel; + + // assume drift is in an arbitrary direction + drift_dir = 4; + // check whether drift is in one of principal directions + for (auto d { 0u }; d < 3u; ++d) { + const auto dprev = (d + 2) % 3; + const auto dnext = (d + 1) % 3; + if (cmp::AlmostZero_host(drift_four_vel[dprev]) and + cmp::AlmostZero_host(drift_four_vel[dnext])) { + drift_dir = SIGN(drift_four_vel[d]) * (d + 1); + break; } } - raise::ErrorIf(drift_dir > 3 and drift_dir != 4, - "Maxwellian: Incorrect drift direction", - HERE); - raise::ErrorIf( - drift_dir != 0 and (M::CoordType != Coord::Cart), - "Maxwellian: Boosting is only supported in Cartesian coordinates", - HERE); } + raise::ErrorIf(drift_dir > 3 and drift_dir != 4, + "Maxwellian: Incorrect drift direction", + HERE); + raise::ErrorIf( + drift_dir != 0 and (M::CoordType != Coord::Cart), + "Maxwellian: Boosting is only supported in Cartesian coordinates", + HERE); } + } - Inline void operator()(const coord_t& x_Code, - vec_t& v, - spidx_t = 0) const { - if (cmp::AlmostZero(temperature)) { - v[0] = ZERO; - v[1] = ZERO; - v[2] = ZERO; - } else { - JuttnerSinge(v, temperature, pool); - } - // @note: boost only when using cartesian coordinates - if constexpr (M::CoordType == Coord::Cart) { - if (drift_dir != 0) { - // Boost an isotropic Maxwellian with a drift velocity using - // flipping method https://arxiv.org/pdf/1504.03910.pdf - // 1. apply drift in X1 direction - const auto gamma { U2GAMMA(v[0], v[1], v[2]) }; - auto rand_gen = pool.get_state(); - if (-drift_3vel * v[0] > gamma * Random(rand_gen)) { - v[0] = -v[0]; - } - pool.free_state(rand_gen); - v[0] = math::sqrt(ONE + SQR(drift_4vel)) * (v[0] + drift_3vel * gamma); - // 2. rotate to desired orientation - if (drift_dir == -1) { - v[0] = -v[0]; - } else if (drift_dir == 2 || drift_dir == -2) { - const auto tmp = v[1]; - v[1] = drift_dir > 0 ? v[0] : -v[0]; - v[0] = tmp; - } else if (drift_dir == 3 || drift_dir == -3) { - const auto tmp = v[2]; - v[2] = drift_dir > 0 ? v[0] : -v[0]; - v[0] = tmp; - } else if (drift_dir == 4) { - vec_t v_old; - v_old[0] = v[0]; - v_old[1] = v[1]; - v_old[2] = v[2]; - - v[0] = v_old[0] * drift_dir_x1 - v_old[1] * drift_dir_x2 - - v_old[2] * drift_dir_x3; - v[1] = (v_old[0] * drift_dir_x2 * (drift_dir_x1 + ONE) + - v_old[1] * - (SQR(drift_dir_x1) + drift_dir_x1 + SQR(drift_dir_x3)) - - v_old[2] * drift_dir_x2 * drift_dir_x3) / - (drift_dir_x1 + ONE); - v[2] = (v_old[0] * drift_dir_x3 * (drift_dir_x1 + ONE) - - v_old[1] * drift_dir_x2 * drift_dir_x3 - - v_old[2] * (-drift_dir_x1 + SQR(drift_dir_x3) - ONE)) / - (drift_dir_x1 + ONE); - } + Inline void operator()(const coord_t& x_Code, + vec_t& v, + spidx_t = 0) const { + if (cmp::AlmostZero(temperature)) { + v[0] = ZERO; + v[1] = ZERO; + v[2] = ZERO; + } else { + JuttnerSinge(v, temperature, pool); + } + // @note: boost only when using cartesian coordinates + if constexpr (M::CoordType == Coord::Cart) { + if (drift_dir != 0) { + // Boost an isotropic Maxwellian with a drift velocity using + // flipping method https://arxiv.org/pdf/1504.03910.pdf + // 1. apply drift in X1 direction + const auto gamma { U2GAMMA(v[0], v[1], v[2]) }; + auto rand_gen = pool.get_state(); + if (-drift_3vel * v[0] > gamma * Random(rand_gen)) { + v[0] = -v[0]; + } + pool.free_state(rand_gen); + v[0] = math::sqrt(ONE + SQR(drift_4vel)) * (v[0] + drift_3vel * gamma); + // 2. rotate to desired orientation + if (drift_dir == -1) { + v[0] = -v[0]; + } else if (drift_dir == 2 || drift_dir == -2) { + const auto tmp = v[1]; + v[1] = drift_dir > 0 ? v[0] : -v[0]; + v[0] = tmp; + } else if (drift_dir == 3 || drift_dir == -3) { + const auto tmp = v[2]; + v[2] = drift_dir > 0 ? v[0] : -v[0]; + v[0] = tmp; + } else if (drift_dir == 4) { + vec_t v_old; + v_old[0] = v[0]; + v_old[1] = v[1]; + v_old[2] = v[2]; + + v[0] = v_old[0] * drift_dir_x1 - v_old[1] * drift_dir_x2 - + v_old[2] * drift_dir_x3; + v[1] = (v_old[0] * drift_dir_x2 * (drift_dir_x1 + ONE) + + v_old[1] * + (SQR(drift_dir_x1) + drift_dir_x1 + SQR(drift_dir_x3)) - + v_old[2] * drift_dir_x2 * drift_dir_x3) / + (drift_dir_x1 + ONE); + v[2] = (v_old[0] * drift_dir_x3 * (drift_dir_x1 + ONE) - + v_old[1] * drift_dir_x2 * drift_dir_x3 - + v_old[2] * (-drift_dir_x1 + SQR(drift_dir_x3) - ONE)) / + (drift_dir_x1 + ONE); } } } + } - private: - random_number_pool_t pool; - - const real_t temperature; - - real_t drift_3vel { ZERO }, drift_4vel { ZERO }; - // components of the unit vector in the direction of the drift - real_t drift_dir_x1 { ZERO }, drift_dir_x2 { ZERO }, drift_dir_x3 { ZERO }; + private: + random_number_pool_t pool; - // values of boost_dir: - // 4 -> arbitrary direction - // 0 -> no drift - // +/- 1 -> +/- x1 - // +/- 2 -> +/- x2 - // +/- 3 -> +/- x3 - short drift_dir { 0 }; - }; + const real_t temperature; - } // namespace experimental + real_t drift_3vel { ZERO }, drift_4vel { ZERO }; + // components of the unit vector in the direction of the drift + real_t drift_dir_x1 { ZERO }, drift_dir_x2 { ZERO }, drift_dir_x3 { ZERO }; + + // values of boost_dir: + // 4 -> arbitrary direction + // 0 -> no drift + // +/- 1 -> +/- x1 + // +/- 2 -> +/- x2 + // +/- 3 -> +/- x3 + short drift_dir { 0 }; + }; } // namespace arch diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index 6313031d1..f92f6defa 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -28,7 +28,6 @@ #include "framework/domain/metadomain.h" #include "kernels/injectors.hpp" -#include "kernels/particle_moments.hpp" #include "kernels/utils.hpp" #include @@ -117,149 +116,146 @@ namespace arch { } }; - template class ED> - struct UniformInjector : BaseInjector { - using energy_dist_t = ED; - static_assert(M::is_metric, "M must be a metric class"); - static_assert(energy_dist_t::is_energy_dist, - "E must be an energy distribution class"); - static constexpr bool is_uniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_t energy_dist; - const std::pair species; - - UniformInjector(const energy_dist_t& energy_dist, - const std::pair& species) - : energy_dist { energy_dist } - , species { species } {} - - ~UniformInjector() = default; - }; - - template class ED> - struct KeepConstantInjector : UniformInjector { - using energy_dist_t = ED; - using UniformInjector::D; - using UniformInjector::C; - - const idx_t density_buff_idx; - boundaries_t probe_box; - - KeepConstantInjector(const energy_dist_t& energy_dist, - const std::pair& species, - idx_t density_buff_idx, - boundaries_t box = {}) - : UniformInjector { energy_dist, species } - , density_buff_idx { density_buff_idx } { - for (auto d { 0u }; d < M::Dim; ++d) { - if (d < box.size()) { - probe_box.push_back({ box[d].first, box[d].second }); - } else { - probe_box.push_back(Range::All); - } - } - } - - ~KeepConstantInjector() = default; - - auto ComputeAvgDensity(const SimulationParams& params, - const Domain& domain) const -> real_t { - const auto result = this->DeduceRegion(domain, probe_box); - const auto should_probe = std::get<0>(result); - if (not should_probe) { - return ZERO; - } - const auto xi_min_arr = std::get<1>(result); - const auto xi_max_arr = std::get<2>(result); - - tuple_t i_min { 0 }; - tuple_t i_max { 0 }; - - auto xi_min_h = Kokkos::create_mirror_view(xi_min_arr); - auto xi_max_h = Kokkos::create_mirror_view(xi_max_arr); - Kokkos::deep_copy(xi_min_h, xi_min_arr); - Kokkos::deep_copy(xi_max_h, xi_max_arr); - - ncells_t num_cells = 1u; - for (auto d { 0u }; d < M::Dim; ++d) { - i_min[d] = std::floor(xi_min_h(d)) + N_GHOSTS; - i_max[d] = std::ceil(xi_max_h(d)) + N_GHOSTS; - num_cells *= (i_max[d] - i_min[d]); - } - - real_t dens { ZERO }; - if (should_probe) { - Kokkos::parallel_reduce( - "AvgDensity", - CreateRangePolicy(i_min, i_max), - kernel::ComputeSum_kernel(domain.fields.buff, density_buff_idx), - dens); - } -#if defined(MPI_ENABLED) - real_t tot_dens { ZERO }; - ncells_t tot_num_cells { 0 }; - MPI_Allreduce(&dens, &tot_dens, 1, mpi::get_type(), MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&num_cells, - &tot_num_cells, - 1, - mpi::get_type(), - MPI_SUM, - MPI_COMM_WORLD); - dens = tot_dens; - num_cells = tot_num_cells; -#endif - if (num_cells > 0) { - return dens / (real_t)(num_cells); - } else { - return ZERO; - } - } - - auto ComputeNumInject(const SimulationParams& params, - const Domain& domain, - real_t number_density, - const boundaries_t& box) const - -> std::tuple, array_t> override { - const auto computed_avg_density = ComputeAvgDensity(params, domain); - - const auto result = this->DeduceRegion(domain, box); - if (not std::get<0>(result)) { - return { false, (npart_t)0, array_t {}, array_t {} }; - } - - const auto xi_min = std::get<1>(result); - const auto xi_max = std::get<2>(result); - auto xi_min_h = Kokkos::create_mirror_view(xi_min); - auto xi_max_h = Kokkos::create_mirror_view(xi_max); - Kokkos::deep_copy(xi_min_h, xi_min); - Kokkos::deep_copy(xi_max_h, xi_max); - - long double num_cells { 1.0 }; - for (auto d { 0u }; d < M::Dim; ++d) { - num_cells *= static_cast(xi_max_h(d)) - - static_cast(xi_min_h(d)); - } - - const auto ppc0 = params.template get("particles.ppc0"); - npart_t nparticles { 0u }; - if (number_density > computed_avg_density) { - nparticles = static_cast( - (long double)(ppc0 * (number_density - computed_avg_density) * 0.5) * - num_cells); - } - - return { nparticles != 0u, nparticles, xi_min, xi_max }; - } - }; + // template + // class ED> struct UniformInjector : BaseInjector { + // using energy_dist_t = ED; + // static_assert(M::is_metric, "M must be a metric class"); + // static_assert(energy_dist_t::is_energy_dist, + // "E must be an energy distribution class"); + // static constexpr bool is_uniform_injector { true }; + // static constexpr Dimension D { M::Dim }; + // static constexpr Coord C { M::CoordType }; + // + // const energy_dist_t energy_dist; + // const std::pair species; + // + // UniformInjector(const energy_dist_t& energy_dist, + // const std::pair& species) + // : energy_dist { energy_dist } + // , species { species } {} + // + // ~UniformInjector() = default; + // }; + + // template class ED> + // struct KeepConstantInjector : UniformInjector { + // using energy_dist_t = ED; + // using UniformInjector::D; + // using UniformInjector::C; + // + // const idx_t density_buff_idx; + // boundaries_t probe_box; + // + // KeepConstantInjector(const energy_dist_t& energy_dist, + // const std::pair& species, + // idx_t density_buff_idx, boundaries_t box = {}) + // : UniformInjector { energy_dist, species } + // , density_buff_idx { density_buff_idx } { + // for (auto d { 0u }; d < M::Dim; ++d) { + // if (d < box.size()) { + // probe_box.push_back({ box[d].first, box[d].second }); + // } else { + // probe_box.push_back(Range::All); + // } + // } + // } + // + // ~KeepConstantInjector() = default; + // + // auto ComputeAvgDensity(const SimulationParams& params, + // const Domain& domain) const -> real_t { + // const auto result = this->DeduceRegion(domain, probe_box); + // const auto should_probe = std::get<0>(result); + // if (not should_probe) { + // return ZERO; + // } + // const auto xi_min_arr = std::get<1>(result); + // const auto xi_max_arr = std::get<2>(result); + // + // tuple_t i_min { 0 }; + // tuple_t i_max { 0 }; + // + // auto xi_min_h = Kokkos::create_mirror_view(xi_min_arr); + // auto xi_max_h = Kokkos::create_mirror_view(xi_max_arr); + // Kokkos::deep_copy(xi_min_h, xi_min_arr); + // Kokkos::deep_copy(xi_max_h, xi_max_arr); + // + // ncells_t num_cells = 1u; + // for (auto d { 0u }; d < M::Dim; ++d) { + // i_min[d] = std::floor(xi_min_h(d)) + N_GHOSTS; + // i_max[d] = std::ceil(xi_max_h(d)) + N_GHOSTS; + // num_cells *= (i_max[d] - i_min[d]); + // } + // + // real_t dens { ZERO }; + // if (should_probe) { + // Kokkos::parallel_reduce( + // "AvgDensity", + // CreateRangePolicy(i_min, i_max), + // kernel::ComputeSum_kernel(domain.fields.buff, density_buff_idx), + // dens); + // } + // #if defined(MPI_ENABLED) + // real_t tot_dens { ZERO }; + // ncells_t tot_num_cells { 0 }; + // MPI_Allreduce(&dens, &tot_dens, 1, mpi::get_type(), MPI_SUM, MPI_COMM_WORLD); + // MPI_Allreduce(&num_cells, + // &tot_num_cells, + // 1, + // mpi::get_type(), + // MPI_SUM, + // MPI_COMM_WORLD); + // dens = tot_dens; + // num_cells = tot_num_cells; + // #endif + // if (num_cells > 0) { + // return dens / (real_t)(num_cells); + // } else { + // return ZERO; + // } + // } + // + // auto ComputeNumInject(const SimulationParams& params, + // const Domain& domain, + // real_t number_density, + // const boundaries_t& box) const + // -> std::tuple, array_t> override { + // const auto computed_avg_density = ComputeAvgDensity(params, domain); + // + // const auto result = this->DeduceRegion(domain, box); + // if (not std::get<0>(result)) { + // return { false, (npart_t)0, array_t {}, array_t {} }; + // } + // + // const auto xi_min = std::get<1>(result); + // const auto xi_max = std::get<2>(result); + // auto xi_min_h = Kokkos::create_mirror_view(xi_min); + // auto xi_max_h = Kokkos::create_mirror_view(xi_max); + // Kokkos::deep_copy(xi_min_h, xi_min); + // Kokkos::deep_copy(xi_max_h, xi_max); + // + // long double num_cells { 1.0 }; + // for (auto d { 0u }; d < M::Dim; ++d) { + // num_cells *= static_cast(xi_max_h(d)) - + // static_cast(xi_min_h(d)); + // } + // + // const auto ppc0 = params.template get("particles.ppc0"); + // npart_t nparticles { 0u }; + // if (number_density > computed_avg_density) { + // nparticles = static_cast( + // (long double)(ppc0 * (number_density - computed_avg_density) * 0.5) * + // num_cells); + // } + // + // return { nparticles != 0u, nparticles, xi_min, xi_max }; + // } + // }; template - class ED, - template - class SD> + template class ED, + template class SD> struct NonUniformInjector { using energy_dist_t = ED; using spatial_dist_t = SD; @@ -426,6 +422,117 @@ namespace arch { ~MovingInjector() = default; }; + // /** + // * @brief Injects uniform number density of particles everywhere in the domain + // * @param domain Domain object + // * @param injector Uniform injector object + // * @param number_density Total number density (in units of n0) + // * @param use_weights Use weights + // * @param box Region to inject the particles in global coords + // * @tparam S Simulation engine type + // * @tparam M Metric type + // * @tparam I Injector type + // */ + // template + // inline void InjectUniform(const SimulationParams& params, + // Domain& domain, + // const I& injector, + // real_t number_density, + // bool use_weights = false, + // const boundaries_t& box = {}) { + // static_assert(M::is_metric, "M must be a metric class"); + // static_assert(I::is_uniform_injector, "I must be a uniform injector class"); + // raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), + // "Weights must be used for non-Cartesian coordinates", + // HERE); + // raise::ErrorIf((M::CoordType == Coord::Cart) && use_weights, + // "Weights should not be used for Cartesian coordinates", + // HERE); + // raise::ErrorIf(params.template get("particles.use_weights") != use_weights, + // "Weights must be enabled from the input file to use them in " + // "the injector", + // HERE); + // if (domain.species[injector.species.first - 1].charge() + + // domain.species[injector.species.second - 1].charge() != + // 0.0f) { + // raise::Warning("Total charge of the injected species is non-zero", HERE); + // } + // + // { + // boundaries_t nonempty_box; + // for (auto d { 0u }; d < M::Dim; ++d) { + // if (d < box.size()) { + // nonempty_box.push_back({ box[d].first, box[d].second }); + // } else { + // nonempty_box.push_back(Range::All); + // } + // } + // const auto result = injector.ComputeNumInject(params, + // domain, + // number_density, + // nonempty_box); + // if (not std::get<0>(result)) { + // return; + // } + // const auto nparticles = std::get<1>(result); + // const auto xi_min = std::get<2>(result); + // const auto xi_max = std::get<3>(result); + // + // Kokkos::parallel_for( + // "InjectUniform", + // nparticles, + // kernel::UniformInjector_kernel( + // injector.species.first, + // injector.species.second, + // domain.species[injector.species.first - 1], + // domain.species[injector.species.second - 1], + // domain.species[injector.species.first - 1].npart(), + // domain.species[injector.species.second - 1].npart(), + // domain.mesh.metric, + // xi_min, + // xi_max, + // injector.energy_dist, + // ONE / params.template get("scales.V0"), + // domain.random_pool)); + // domain.species[injector.species.first - 1].set_npart( + // domain.species[injector.species.first - 1].npart() + nparticles); + // domain.species[injector.species.second - 1].set_npart( + // domain.species[injector.species.second - 1].npart() + nparticles); + // } + // } + // + // namespace experimental { + + template class ED1, + template class ED2> + struct UniformInjector : BaseInjector { + using energy_dist_1_t = ED1; + using energy_dist_2_t = ED2; + static_assert(M::is_metric, "M must be a metric class"); + static_assert(energy_dist_1_t::is_energy_dist, + "ED1 must be an energy distribution class"); + static_assert(energy_dist_2_t::is_energy_dist, + "ED2 must be an energy distribution class"); + static constexpr bool is_uniform_injector { true }; + static constexpr Dimension D { M::Dim }; + static constexpr Coord C { M::CoordType }; + + const energy_dist_1_t energy_dist_1; + const energy_dist_2_t energy_dist_2; + const std::pair species; + + UniformInjector(const energy_dist_1_t& energy_dist_1, + const energy_dist_2_t& energy_dist_2, + const std::pair& species) + : energy_dist_1 { energy_dist_1 } + , energy_dist_2 { energy_dist_2 } + , species { species } {} + + ~UniformInjector() = default; + }; + /** * @brief Injects uniform number density of particles everywhere in the domain * @param domain Domain object @@ -485,17 +592,20 @@ namespace arch { Kokkos::parallel_for( "InjectUniform", nparticles, - kernel::UniformInjector_kernel( + kernel::UniformInjector_kernel( injector.species.first, injector.species.second, domain.species[injector.species.first - 1], domain.species[injector.species.second - 1], + nparticles, + domain.index(), domain.species[injector.species.first - 1].npart(), domain.species[injector.species.second - 1].npart(), domain.mesh.metric, xi_min, xi_max, - injector.energy_dist, + injector.energy_dist_1, + injector.energy_dist_2, ONE / params.template get("scales.V0"), domain.random_pool)); domain.species[injector.species.first - 1].set_npart( @@ -505,123 +615,7 @@ namespace arch { } } - namespace experimental { - - template - class ED1, - template - class ED2> - struct UniformInjector : BaseInjector { - using energy_dist_1_t = ED1; - using energy_dist_2_t = ED2; - static_assert(M::is_metric, "M must be a metric class"); - static_assert(energy_dist_1_t::is_energy_dist, - "ED1 must be an energy distribution class"); - static_assert(energy_dist_2_t::is_energy_dist, - "ED2 must be an energy distribution class"); - static constexpr bool is_uniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_1_t energy_dist_1; - const energy_dist_2_t energy_dist_2; - const std::pair species; - - UniformInjector(const energy_dist_1_t& energy_dist_1, - const energy_dist_2_t& energy_dist_2, - const std::pair& species) - : energy_dist_1 { energy_dist_1 } - , energy_dist_2 { energy_dist_2 } - , species { species } {} - - ~UniformInjector() = default; - }; - - /** - * @brief Injects uniform number density of particles everywhere in the domain - * @param domain Domain object - * @param injector Uniform injector object - * @param number_density Total number density (in units of n0) - * @param use_weights Use weights - * @param box Region to inject the particles in global coords - * @tparam S Simulation engine type - * @tparam M Metric type - * @tparam I Injector type - */ - template - inline void InjectUniform(const SimulationParams& params, - Domain& domain, - const I& injector, - real_t number_density, - bool use_weights = false, - const boundaries_t& box = {}) { - static_assert(M::is_metric, "M must be a metric class"); - static_assert(I::is_uniform_injector, "I must be a uniform injector class"); - raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), - "Weights must be used for non-Cartesian coordinates", - HERE); - raise::ErrorIf((M::CoordType == Coord::Cart) && use_weights, - "Weights should not be used for Cartesian coordinates", - HERE); - raise::ErrorIf( - params.template get("particles.use_weights") != use_weights, - "Weights must be enabled from the input file to use them in " - "the injector", - HERE); - if (domain.species[injector.species.first - 1].charge() + - domain.species[injector.species.second - 1].charge() != - 0.0f) { - raise::Warning("Total charge of the injected species is non-zero", HERE); - } - - { - boundaries_t nonempty_box; - for (auto d { 0u }; d < M::Dim; ++d) { - if (d < box.size()) { - nonempty_box.push_back({ box[d].first, box[d].second }); - } else { - nonempty_box.push_back(Range::All); - } - } - const auto result = injector.ComputeNumInject(params, - domain, - number_density, - nonempty_box); - if (not std::get<0>(result)) { - return; - } - const auto nparticles = std::get<1>(result); - const auto xi_min = std::get<2>(result); - const auto xi_max = std::get<3>(result); - - Kokkos::parallel_for( - "InjectUniform", - nparticles, - kernel::experimental:: - UniformInjector_kernel( - injector.species.first, - injector.species.second, - domain.species[injector.species.first - 1], - domain.species[injector.species.second - 1], - domain.species[injector.species.first - 1].npart(), - domain.species[injector.species.second - 1].npart(), - domain.mesh.metric, - xi_min, - xi_max, - injector.energy_dist_1, - injector.energy_dist_2, - ONE / params.template get("scales.V0"), - domain.random_pool)); - domain.species[injector.species.first - 1].set_npart( - domain.species[injector.species.first - 1].npart() + nparticles); - domain.species[injector.species.second - 1].set_npart( - domain.species[injector.species.second - 1].npart() + nparticles); - } - } - - } // namespace experimental + // } // namespace experimental /** * @brief Injects particles from a globally-defined map diff --git a/src/archetypes/utils.h b/src/archetypes/utils.h index d3d6ae475..7a5296771 100644 --- a/src/archetypes/utils.h +++ b/src/archetypes/utils.h @@ -51,29 +51,26 @@ namespace arch { const auto temperature_1 = temperatures.first / mass_1; const auto temperature_2 = temperatures.second / mass_2; - const auto maxwellian_1 = arch::experimental::Maxwellian( - domain.mesh.metric, - domain.random_pool, - temperature_1, - drift_four_vels.first); - const auto maxwellian_2 = arch::experimental::Maxwellian( - domain.mesh.metric, - domain.random_pool, - temperature_2, - drift_four_vels.second); + const auto maxwellian_1 = arch::Maxwellian(domain.mesh.metric, + domain.random_pool, + temperature_1, + drift_four_vels.first); + const auto maxwellian_2 = arch::Maxwellian(domain.mesh.metric, + domain.random_pool, + temperature_2, + drift_four_vels.second); - const auto injector = arch::experimental:: - UniformInjector( - maxwellian_1, - maxwellian_2, - species); + const auto injector = arch::UniformInjector( + maxwellian_1, + maxwellian_2, + species); - arch::experimental::InjectUniform(params, - domain, - injector, - tot_number_density, - use_weights, - box); + arch::InjectUniform(params, + domain, + injector, + tot_number_density, + use_weights, + box); } /** diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index c321d18f8..69642e2fd 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -25,9 +25,60 @@ namespace kernel { using namespace ntt; - template + template + Inline void InjectParticle(const npart_t& p, + const array_t& i1_arr, + const array_t& i2_arr, + const array_t& i3_arr, + const array_t& dx1_arr, + const array_t& dx2_arr, + const array_t& dx3_arr, + const array_t& ux1_arr, + const array_t& ux2_arr, + const array_t& ux3_arr, + const array_t& phi_arr, + const array_t& weight_arr, + const array_t& tag_arr, + const array_t& pld_i_arr, + const tuple_t& xi_Cd, + const tuple_t& dxi_Cd, + const vec_t& v_Cd, + const real_t& weight = ONE, + const real_t& phi = ZERO, + const npart_t& domain_idx = 0u, + const npart_t& species_cntr = 0u) { + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + i1_arr(p) = xi_Cd[0]; + dx1_arr(p) = dxi_Cd[0]; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + i2_arr(p) = xi_Cd[1]; + dx2_arr(p) = dxi_Cd[1]; + } + if constexpr (D == Dim::_3D) { + i3_arr(p) = xi_Cd[2]; + dx3_arr(p) = dxi_Cd[2]; + } + if constexpr (D == Dim::_2D and C != Coord::Cart) { + phi_arr(p) = phi; + } + ux1_arr(p) = v_Cd[0]; + ux2_arr(p) = v_Cd[1]; + ux3_arr(p) = v_Cd[2]; + tag_arr(p) = ParticleTag::alive; + weight_arr(p) = weight; + if constexpr (T) { + pld_i_arr(p, pldi::spcCtr) = species_cntr; +#if defined(MPI_ENABLED) + pld_i_arr(p, pldi::domIdx) = domain_idx; +#endif + } + } + + template struct UniformInjector_kernel { - static_assert(ED::is_energy_dist, "ED must be an energy distribution class"); + static_assert(ED1::is_energy_dist, "ED1 must be an energy distribution class"); + static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); static_assert(M::is_metric, "M must be a metric class"); const spidx_t spidx1, spidx2; @@ -38,6 +89,7 @@ namespace kernel { array_t phis_1; array_t weights_1; array_t tags_1; + array_t pldis_1; array_t i1s_2, i2s_2, i3s_2; array_t dx1s_2, dx2s_2, dx3s_2; @@ -45,11 +97,15 @@ namespace kernel { array_t phis_2; array_t weights_2; array_t tags_2; + array_t pldis_2; npart_t offset1, offset2; + npart_t domain_idx, cntr1, cntr2; + bool use_tracking_1, use_tracking_2; const M metric; const array_t xi_min, xi_max; - const ED energy_dist; + const ED1 energy_dist_1; + const ED2 energy_dist_2; const real_t inv_V0; random_number_pool_t random_pool; @@ -57,12 +113,15 @@ namespace kernel { spidx_t spidx2, Particles& species1, Particles& species2, + npart_t inject_npart, + npart_t domain_idx, npart_t offset1, npart_t offset2, const M& metric, const array_t& xi_min, const array_t& xi_max, - const ED& energy_dist, + const ED1& energy_dist_1, + const ED2& energy_dist_2, real_t inv_V0, random_number_pool_t& random_pool) : spidx1 { spidx1 } @@ -79,6 +138,7 @@ namespace kernel { , phis_1 { species1.phi } , weights_1 { species1.weight } , tags_1 { species1.tag } + , pldis_1 { species1.pld_i } , i1s_2 { species2.i1 } , i2s_2 { species2.i2 } , i3s_2 { species2.i3 } @@ -91,26 +151,80 @@ namespace kernel { , phis_2 { species2.phi } , weights_2 { species2.weight } , tags_2 { species2.tag } + , pldis_2 { species2.pld_i } , offset1 { offset1 } , offset2 { offset2 } + , use_tracking_1 { species1.use_tracking() } + , use_tracking_2 { species2.use_tracking() } + , domain_idx { domain_idx } + , cntr1 { species1.counter() } + , cntr2 { species2.counter() } , metric { metric } , xi_min { xi_min } , xi_max { xi_max } - , energy_dist { energy_dist } + , energy_dist_1 { energy_dist_1 } + , energy_dist_2 { energy_dist_2 } , inv_V0 { inv_V0 } - , random_pool { random_pool } {} + , random_pool { random_pool } { + if (use_tracking_1) { + printf("using tracking for species #1\n"); + species1.set_counter(cntr1 + inject_npart); +#if !defined(MPI_ENABLED) + raise::ErrorIf(species1.pld_i.extent(1) < 1, + "Particle tracking is enabled but the " + "particle integer payload size is less " + "than 1", + HERE); +#else + raise::ErrorIf(species1.pld_i.extent(1) < 2, + "Particle tracking is enabled but the " + "particle integer payload size is less " + "than 2", + HERE); +#endif + } + if (use_tracking_2) { + printf("using tracking for species #2\n"); + species2.set_counter(cntr2 + inject_npart); +#if !defined(MPI_ENABLED) + raise::ErrorIf(species2.pld_i.extent(1) < 1, + "Particle tracking is enabled but the " + "particle integer payload size is less " + "than 1", + HERE); +#else + raise::ErrorIf(species2.pld_i.extent(1) < 2, + "Particle tracking is enabled but the " + "particle integer payload size is less " + "than 2", + HERE); +#endif + } + } Inline void operator()(index_t p) const { - coord_t x_Cd { ZERO }; - vec_t v1 { ZERO }, v2 { ZERO }; + coord_t x_Cd { ZERO }; + tuple_t xi_Cd { 0 }; + tuple_t dxi_Cd { static_cast(0) }; + vec_t v1 { ZERO }, v2 { ZERO }; { // generate a random coordinate auto rand_gen = random_pool.get_state(); - x_Cd[0] = xi_min(0) + Random(rand_gen) * (xi_max(0) - xi_min(0)); + if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or + M::Dim == Dim::_3D) { + x_Cd[0] = xi_min(0) + Random(rand_gen) * (xi_max(0) - xi_min(0)); + xi_Cd[0] = static_cast(x_Cd[0]); + dxi_Cd[0] = static_cast(x_Cd[0] - xi_Cd[0]); + } if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { x_Cd[1] = xi_min(1) + Random(rand_gen) * (xi_max(1) - xi_min(1)); + xi_Cd[1] = static_cast(x_Cd[1]); + xi_Cd[1] = static_cast(x_Cd[1]); + dxi_Cd[1] = static_cast(x_Cd[1] - xi_Cd[1]); } if constexpr (M::Dim == Dim::_3D) { x_Cd[2] = xi_min(2) + Random(rand_gen) * (xi_max(2) - xi_min(2)); + xi_Cd[2] = static_cast(x_Cd[2]); + dxi_Cd[2] = static_cast(x_Cd[2] - xi_Cd[2]); } random_pool.free_state(rand_gen); } @@ -118,241 +232,72 @@ namespace kernel { coord_t x_Ph { ZERO }; metric.template convert(x_Cd, x_Ph); if constexpr (M::CoordType == Coord::Cart) { - energy_dist(x_Ph, v1, spidx1); - energy_dist(x_Ph, v2, spidx2); + energy_dist_1(x_Ph, v1, spidx1); + energy_dist_2(x_Ph, v2, spidx2); } else if constexpr (S == SimEngine::SRPIC) { coord_t x_Cd_ { ZERO }; x_Cd_[0] = x_Cd[0]; x_Cd_[1] = x_Cd[1]; x_Cd_[2] = ZERO; // phi = 0 vec_t v_Ph { ZERO }; - energy_dist(x_Ph, v_Ph, spidx1); + energy_dist_1(x_Ph, v_Ph, spidx1); metric.template transform_xyz(x_Cd_, v_Ph, v1); - energy_dist(x_Ph, v_Ph, spidx2); + energy_dist_2(x_Ph, v_Ph, spidx2); metric.template transform_xyz(x_Cd_, v_Ph, v2); } else if constexpr (S == SimEngine::GRPIC) { vec_t v_Ph { ZERO }; - energy_dist(x_Ph, v_Ph, spidx1); + energy_dist_1(x_Ph, v_Ph, spidx1); metric.template transform(x_Cd, v_Ph, v1); - energy_dist(x_Ph, v_Ph, spidx2); + energy_dist_2(x_Ph, v_Ph, spidx2); metric.template transform(x_Cd, v_Ph, v2); } else { raise::KernelError(HERE, "Unknown simulation engine"); } } - // inject - i1s_1(p + offset1) = static_cast(x_Cd[0]); - dx1s_1(p + offset1) = static_cast( - x_Cd[0] - static_cast(i1s_1(p + offset1))); - i1s_2(p + offset2) = i1s_1(p + offset1); - dx1s_2(p + offset2) = dx1s_1(p + offset1); - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - i2s_1(p + offset1) = static_cast(x_Cd[1]); - dx2s_1(p + offset1) = static_cast( - x_Cd[1] - static_cast(i2s_1(p + offset1))); - i2s_2(p + offset2) = i2s_1(p + offset1); - dx2s_2(p + offset2) = dx2s_1(p + offset1); - if constexpr (S == SimEngine::SRPIC && M::CoordType != Coord::Cart) { - phis_1(p + offset1) = ZERO; - phis_2(p + offset2) = ZERO; - } + // clang-format off + real_t weight = ONE; + if constexpr (M::CoordType != Coord::Cart) { + const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); + weight = sqrt_det_h * inv_V0; } - if constexpr (M::Dim == Dim::_3D) { - i3s_1(p + offset1) = static_cast(x_Cd[2]); - dx3s_1(p + offset1) = static_cast( - x_Cd[2] - static_cast(i3s_1(p + offset1))); - i3s_2(p + offset2) = i3s_1(p + offset1); - dx3s_2(p + offset2) = dx3s_1(p + offset1); + if (not use_tracking_1) { + InjectParticle( + p + offset1, + i1s_1, i2s_1, i3s_1, + dx1s_1, dx2s_1, dx3s_1, + ux1s_1, ux2s_1, ux3s_1, + phis_1, weights_1, tags_1, pldis_1, + xi_Cd, dxi_Cd, v1, weight, ZERO); + } else { + InjectParticle( + p + offset1, + i1s_1, i2s_1, i3s_1, + dx1s_1, dx2s_1, dx3s_1, + ux1s_1, ux2s_1, ux3s_1, + phis_1, weights_1, tags_1, pldis_1, + xi_Cd, dxi_Cd, v1, weight, ZERO, domain_idx, cntr1 + p); } - ux1s_1(p + offset1) = v1[0]; - ux2s_1(p + offset1) = v1[1]; - ux3s_1(p + offset1) = v1[2]; - ux1s_2(p + offset2) = v2[0]; - ux2s_2(p + offset2) = v2[1]; - ux3s_2(p + offset2) = v2[2]; - tags_1(p + offset1) = ParticleTag::alive; - tags_2(p + offset2) = ParticleTag::alive; - if constexpr (M::CoordType == Coord::Cart) { - weights_1(p + offset1) = ONE; - weights_2(p + offset2) = ONE; + if (not use_tracking_2) { + InjectParticle( + p + offset2, + i1s_2, i2s_2, i3s_2, + dx1s_2, dx2s_2, dx3s_2, + ux1s_2, ux2s_2, ux3s_2, + phis_2, weights_2, tags_2, pldis_2, + xi_Cd, dxi_Cd, v2, weight, ZERO); } else { - const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); - weights_1(p + offset1) = sqrt_det_h * inv_V0; - weights_2(p + offset2) = sqrt_det_h * inv_V0; + InjectParticle( + p + offset2, + i1s_2, i2s_2, i3s_2, + dx1s_2, dx2s_2, dx3s_2, + ux1s_2, ux2s_2, ux3s_2, + phis_2, weights_2, tags_2, pldis_2, + xi_Cd, dxi_Cd, v2, weight, ZERO, domain_idx, cntr2 + p); } + // clang-format on } }; // struct UniformInjector_kernel - namespace experimental { - - template - struct UniformInjector_kernel { - static_assert(ED1::is_energy_dist, - "ED1 must be an energy distribution class"); - static_assert(ED2::is_energy_dist, - "ED2 must be an energy distribution class"); - static_assert(M::is_metric, "M must be a metric class"); - - const spidx_t spidx1, spidx2; - - array_t i1s_1, i2s_1, i3s_1; - array_t dx1s_1, dx2s_1, dx3s_1; - array_t ux1s_1, ux2s_1, ux3s_1; - array_t phis_1; - array_t weights_1; - array_t tags_1; - - array_t i1s_2, i2s_2, i3s_2; - array_t dx1s_2, dx2s_2, dx3s_2; - array_t ux1s_2, ux2s_2, ux3s_2; - array_t phis_2; - array_t weights_2; - array_t tags_2; - - npart_t offset1, offset2; - const M metric; - const array_t xi_min, xi_max; - const ED1 energy_dist_1; - const ED2 energy_dist_2; - const real_t inv_V0; - random_number_pool_t random_pool; - - UniformInjector_kernel(spidx_t spidx1, - spidx_t spidx2, - Particles& species1, - Particles& species2, - npart_t offset1, - npart_t offset2, - const M& metric, - const array_t& xi_min, - const array_t& xi_max, - const ED1& energy_dist_1, - const ED2& energy_dist_2, - real_t inv_V0, - random_number_pool_t& random_pool) - : spidx1 { spidx1 } - , spidx2 { spidx2 } - , i1s_1 { species1.i1 } - , i2s_1 { species1.i2 } - , i3s_1 { species1.i3 } - , dx1s_1 { species1.dx1 } - , dx2s_1 { species1.dx2 } - , dx3s_1 { species1.dx3 } - , ux1s_1 { species1.ux1 } - , ux2s_1 { species1.ux2 } - , ux3s_1 { species1.ux3 } - , phis_1 { species1.phi } - , weights_1 { species1.weight } - , tags_1 { species1.tag } - , i1s_2 { species2.i1 } - , i2s_2 { species2.i2 } - , i3s_2 { species2.i3 } - , dx1s_2 { species2.dx1 } - , dx2s_2 { species2.dx2 } - , dx3s_2 { species2.dx3 } - , ux1s_2 { species2.ux1 } - , ux2s_2 { species2.ux2 } - , ux3s_2 { species2.ux3 } - , phis_2 { species2.phi } - , weights_2 { species2.weight } - , tags_2 { species2.tag } - , offset1 { offset1 } - , offset2 { offset2 } - , metric { metric } - , xi_min { xi_min } - , xi_max { xi_max } - , energy_dist_1 { energy_dist_1 } - , energy_dist_2 { energy_dist_2 } - , inv_V0 { inv_V0 } - , random_pool { random_pool } {} - - Inline void operator()(index_t p) const { - coord_t x_Cd { ZERO }; - vec_t v1 { ZERO }, v2 { ZERO }; - { // generate a random coordinate - auto rand_gen = random_pool.get_state(); - x_Cd[0] = xi_min(0) + Random(rand_gen) * (xi_max(0) - xi_min(0)); - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - x_Cd[1] = xi_min(1) + - Random(rand_gen) * (xi_max(1) - xi_min(1)); - } - if constexpr (M::Dim == Dim::_3D) { - x_Cd[2] = xi_min(2) + - Random(rand_gen) * (xi_max(2) - xi_min(2)); - } - random_pool.free_state(rand_gen); - } - { // generate the velocity - coord_t x_Ph { ZERO }; - metric.template convert(x_Cd, x_Ph); - if constexpr (M::CoordType == Coord::Cart) { - energy_dist_1(x_Ph, v1, spidx1); - energy_dist_2(x_Ph, v2, spidx2); - } else if constexpr (S == SimEngine::SRPIC) { - coord_t x_Cd_ { ZERO }; - x_Cd_[0] = x_Cd[0]; - x_Cd_[1] = x_Cd[1]; - x_Cd_[2] = ZERO; // phi = 0 - vec_t v_Ph { ZERO }; - energy_dist_1(x_Ph, v_Ph, spidx1); - metric.template transform_xyz(x_Cd_, v_Ph, v1); - energy_dist_2(x_Ph, v_Ph, spidx2); - metric.template transform_xyz(x_Cd_, v_Ph, v2); - } else if constexpr (S == SimEngine::GRPIC) { - vec_t v_Ph { ZERO }; - energy_dist_1(x_Ph, v_Ph, spidx1); - metric.template transform(x_Cd, v_Ph, v1); - energy_dist_2(x_Ph, v_Ph, spidx2); - metric.template transform(x_Cd, v_Ph, v2); - } else { - raise::KernelError(HERE, "Unknown simulation engine"); - } - } - // inject - i1s_1(p + offset1) = static_cast(x_Cd[0]); - dx1s_1(p + offset1) = static_cast( - x_Cd[0] - static_cast(i1s_1(p + offset1))); - i1s_2(p + offset2) = i1s_1(p + offset1); - dx1s_2(p + offset2) = dx1s_1(p + offset1); - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - i2s_1(p + offset1) = static_cast(x_Cd[1]); - dx2s_1(p + offset1) = static_cast( - x_Cd[1] - static_cast(i2s_1(p + offset1))); - i2s_2(p + offset2) = i2s_1(p + offset1); - dx2s_2(p + offset2) = dx2s_1(p + offset1); - if constexpr (S == SimEngine::SRPIC && M::CoordType != Coord::Cart) { - phis_1(p + offset1) = ZERO; - phis_2(p + offset2) = ZERO; - } - } - if constexpr (M::Dim == Dim::_3D) { - i3s_1(p + offset1) = static_cast(x_Cd[2]); - dx3s_1(p + offset1) = static_cast( - x_Cd[2] - static_cast(i3s_1(p + offset1))); - i3s_2(p + offset2) = i3s_1(p + offset1); - dx3s_2(p + offset2) = dx3s_1(p + offset1); - } - ux1s_1(p + offset1) = v1[0]; - ux2s_1(p + offset1) = v1[1]; - ux3s_1(p + offset1) = v1[2]; - ux1s_2(p + offset2) = v2[0]; - ux2s_2(p + offset2) = v2[1]; - ux3s_2(p + offset2) = v2[2]; - tags_1(p + offset1) = ParticleTag::alive; - tags_2(p + offset2) = ParticleTag::alive; - if constexpr (M::CoordType == Coord::Cart) { - weights_1(p + offset1) = ONE; - weights_2(p + offset2) = ONE; - } else { - const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); - weights_1(p + offset1) = sqrt_det_h * inv_V0; - weights_2(p + offset2) = sqrt_det_h * inv_V0; - } - } - }; // struct UniformInjector_kernel - - } // namespace experimental - template struct GlobalInjector_kernel { static_assert(M::is_metric, "M must be a metric class"); From fa26b3278f339cb5c5a1825095d6e2ef8142148b Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Oct 2025 15:50:24 -0400 Subject: [PATCH 097/154] particle output moved to prtl class --- src/framework/containers/particles.h | 10 + src/framework/containers/particles_io.cpp | 308 ++++++++++++++++++++++ src/framework/domain/output.cpp | 89 ++----- src/global/global.h | 5 + src/output/particles.h | 39 --- src/output/writer.cpp | 31 +-- src/output/writer.h | 35 ++- 7 files changed, 368 insertions(+), 149 deletions(-) delete mode 100644 src/output/particles.h diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 47758b0d9..144ca611c 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -273,6 +273,16 @@ namespace ntt { #endif #if defined(OUTPUT_ENABLED) + void OutputDeclare(adios2::IO&) const; + + template + void OutputWrite(adios2::IO&, + adios2::Engine&, + npart_t, + std::size_t, + std::size_t, + const M&); + void CheckpointDeclare(adios2::IO&) const; void CheckpointRead(adios2::IO&, adios2::Engine&, std::size_t, std::size_t); void CheckpointWrite(adios2::IO&, adios2::Engine&, std::size_t, std::size_t) const; diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index 70b7f2458..d1b571d71 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -5,10 +5,20 @@ #include "utils/formatting.h" #include "utils/log.h" +#include "metrics/kerr_schild.h" +#include "metrics/kerr_schild_0.h" +#include "metrics/minkowski.h" +#include "metrics/qkerr_schild.h" +#include "metrics/qspherical.h" +#include "metrics/spherical.h" + #include "framework/containers/particles.h" #include "output/utils/readers.h" #include "output/utils/writers.h" +#include "kernels/prtls_to_phys.hpp" + +#include #include #if defined(MPI_ENABLED) @@ -16,6 +26,275 @@ #endif namespace ntt { + /* * * * * * * * * + * Output + * * * * * * * * */ + template + void Particles::OutputDeclare(adios2::IO& io) const { + for (auto d { 0u }; d < D; ++d) { + io.DefineVariable(fmt::format("pX%d_%d", d + 1, index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + for (auto d { 0u }; d < Dim::_3D; ++d) { + io.DefineVariable(fmt::format("pU%d_%d", d + 1, index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + io.DefineVariable(fmt::format("pW_%d", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + if (npld_r() > 0) { + for (auto pr { 0 }; pr < npld_r(); ++pr) { + io.DefineVariable(fmt::format("pPLDR%d_%d", pr, index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + } + auto num_track_plds = 0; + if (use_tracking()) { +#if !defined(MPI_ENABLED) + num_track_plds = 1; + io.DefineVariable(fmt::format("pIDX_%d", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); +#else + num_track_plds = 2; + io.DefineVariable(fmt::format("pIDX_%d", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + io.DefineVariable(fmt::format("pRNK_%d", index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); +#endif + } + if (npld_i() > num_track_plds) { + for (auto pr { num_track_plds }; pr < npld_i(); ++pr) { + io.DefineVariable( + fmt::format("pPLDI%d_%d", pr - num_track_plds, index()), + { adios2::UnknownDim }, + { adios2::UnknownDim }, + { adios2::UnknownDim }); + } + } + } + + template + template + void Particles::OutputWrite(adios2::IO& io, + adios2::Engine& writer, + npart_t prtl_stride, + std::size_t domains_total, + std::size_t domains_offset, + const M& metric) { + if (not is_sorted()) { + RemoveDead(); + } + const npart_t nout = npart() / prtl_stride; + array_t buff_x1, buff_x2, buff_x3; + array_t buff_ux1 { "ux1", nout }; + array_t buff_ux2 { "ux2", nout }; + array_t buff_ux3 { "ux3", nout }; + array_t buff_wei { "w", nout }; + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + buff_x1 = array_t { "x1", nout }; + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + buff_x2 = array_t { "x2", nout }; + } + if constexpr (D == Dim::_3D or ((D == Dim::_2D) and (C != Coord::Cart))) { + buff_x3 = array_t { "x3", nout }; + } + array_t buff_pldr; + array_t buff_pldi; + + if (npld_r() > 0) { + buff_pldr = array_t { "pldr", nout, npld_r() }; + } + if (npld_i() > 0) { + buff_pldi = array_t { "pldi", nout, npld_i() }; + } + + if (nout > 0) { + // clang-format off + Kokkos::parallel_for( + "PrtlToPhys", + nout, + kernel::PrtlToPhys_kernel(prtl_stride, + buff_x1, buff_x2, buff_x3, + buff_ux1, buff_ux2, buff_ux3, + buff_wei, + buff_pldr, buff_pldi, + i1, i2, i3, + dx1, dx2, dx3, + ux1, ux2, ux3, + phi, weight, + pld_r, pld_i, + metric)); + // clang-format on + } + npart_t nout_offset = 0; + npart_t nout_total = nout; +#if defined(MPI_ENABLED) + auto nout_total_vec = std::vector(domains_total); + MPI_Allgather(&nout, + 1, + mpi::get_type(), + nout_total_vec.data(), + 1, + mpi::get_type(), + MPI_COMM_WORLD); + nout_total = 0; + for (auto r = 0; r < domains_total; ++r) { + if (r < domains_offset) { + nout_offset += nout_total_vec[r]; + } + nout_total += nout_total_vec[r]; + } +#endif // MPI_ENABLED + out::Write1DArray(io, + writer, + fmt::format("pW_%d", index()), + buff_wei, + nout, + nout_total, + nout_offset); + out::Write1DArray(io, + writer, + fmt::format("pU1_%d", index()), + buff_ux1, + nout, + nout_total, + nout_offset); + out::Write1DArray(io, + writer, + fmt::format("pU2_%d", index()), + buff_ux2, + nout, + nout_total, + nout_offset); + out::Write1DArray(io, + writer, + fmt::format("pU3_%d", index()), + buff_ux3, + nout, + nout_total, + nout_offset); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + out::Write1DArray(io, + writer, + fmt::format("pX1_%d", index()), + buff_x1, + nout, + nout_total, + nout_offset); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + out::Write1DArray(io, + writer, + fmt::format("pX2_%d", index()), + buff_x2, + nout, + nout_total, + nout_offset); + } + if constexpr (D == Dim::_3D or ((D == Dim::_2D) and (C != Coord::Cart))) { + out::Write1DArray(io, + writer, + fmt::format("pX3_%d", index()), + buff_x3, + nout, + nout_total, + nout_offset); + } + + if (npld_r() > 0) { + for (auto pr { 0 }; pr < npld_r(); ++pr) { + auto buff_sub = Kokkos::subview(buff_pldr, Kokkos::ALL, pr); + out::Write1DSubArray( + io, + writer, + fmt::format("pPLDR%d_%d", pr, index()), + buff_sub, + nout, + nout_total, + nout_offset); + } + } + auto num_track_plds = 0; + if (use_tracking()) { +#if !defined(MPI_ENABLED) + num_track_plds = 1; + { + auto buff_sub = Kokkos::subview(buff_pldi, + Kokkos::ALL, + static_cast(pldi::spcCtr)); + out::Write1DSubArray( + io, + writer, + fmt::format("pIDX_%d", pr, index()), + buff_sub, + nout, + nout_total, + nout_offset); + } +#else + num_track_plds = 2; + { + auto buff_sub = Kokkos::subview(buff_pldi, + Kokkos::ALL, + static_cast(pldi::spcCtr)); + out::Write1DSubArray( + io, + writer, + fmt::format("pIDX_%d", index()), + buff_sub, + nout, + nout_total, + nout_offset); + } + { + auto buff_sub = Kokkos::subview(buff_pldi, + Kokkos::ALL, + static_cast(pldi::domIdx)); + out::Write1DSubArray( + io, + writer, + fmt::format("pRNK_%d", index()), + buff_sub, + nout, + nout_total, + nout_offset); + } +#endif + } + if (npld_i() > num_track_plds) { + for (auto pr { num_track_plds }; pr < npld_i(); ++pr) { + auto buff_sub = Kokkos::subview(buff_pldi, + Kokkos::ALL, + static_cast(pr)); + out::Write1DSubArray( + io, + writer, + fmt::format("pPLDI%d_%d", pr - num_track_plds, index()), + buff_sub, + nout, + nout_total, + nout_offset); + } + } + } + + /* * * * * * * * * + * Checkpoints + * * * * * * * * */ template void Particles::CheckpointDeclare(adios2::IO& io) const { @@ -477,6 +756,35 @@ namespace ntt { } } +#define PARTICLES_OUTPUT_DECLARE(D, C) \ + template void Particles::OutputDeclare(adios2::IO&) const; + + PARTICLES_OUTPUT_DECLARE(Dim::_1D, Coord::Cart) + PARTICLES_OUTPUT_DECLARE(Dim::_2D, Coord::Cart) + PARTICLES_OUTPUT_DECLARE(Dim::_3D, Coord::Cart) + PARTICLES_OUTPUT_DECLARE(Dim::_2D, Coord::Sph) + PARTICLES_OUTPUT_DECLARE(Dim::_2D, Coord::Qsph) +#undef PARTICLES_OUTPUT_DECLARE + +#define PARTICLES_OUTPUT_WRITE(S, M) \ + template void Particles::OutputWrite( \ + adios2::IO&, \ + adios2::Engine&, \ + npart_t, \ + std::size_t, \ + std::size_t, \ + const M&); + + PARTICLES_OUTPUT_WRITE(SimEngine::SRPIC, metric::Minkowski) + PARTICLES_OUTPUT_WRITE(SimEngine::SRPIC, metric::Minkowski) + PARTICLES_OUTPUT_WRITE(SimEngine::SRPIC, metric::Minkowski) + PARTICLES_OUTPUT_WRITE(SimEngine::SRPIC, metric::Spherical) + PARTICLES_OUTPUT_WRITE(SimEngine::SRPIC, metric::QSpherical) + PARTICLES_OUTPUT_WRITE(SimEngine::GRPIC, metric::KerrSchild) + PARTICLES_OUTPUT_WRITE(SimEngine::GRPIC, metric::QKerrSchild) + PARTICLES_OUTPUT_WRITE(SimEngine::GRPIC, metric::KerrSchild0) +#undef PARTICLES_OUTPUT_WRITE + #define PARTICLES_CHECKPOINTS(D, C) \ template void Particles::CheckpointDeclare(adios2::IO&) const; \ template void Particles::CheckpointRead(adios2::IO&, \ diff --git a/src/framework/domain/output.cpp b/src/framework/domain/output.cpp index 6dad452e8..0a41748d9 100644 --- a/src/framework/domain/output.cpp +++ b/src/framework/domain/output.cpp @@ -21,7 +21,6 @@ #include "kernels/divergences.hpp" #include "kernels/fields_to_phys.hpp" #include "kernels/particle_moments.hpp" -#include "kernels/prtls_to_phys.hpp" #include #include @@ -92,7 +91,13 @@ namespace ntt { if constexpr (M::CoordType != Coord::Cart) { dim = Dim::_3D; } - g_writer.defineParticleOutputs(dim, species_to_write); + g_writer.clearSpeciesIndex(); + for (const auto& s : species_to_write) { + g_writer.addSpeciesIndex(s); + } + for (const auto sp : g_writer.speciesIndices()) { + local_domain->species[sp - 1].OutputDeclare(g_writer.io()); + } // spectra write all particle species std::vector spectra_species {}; @@ -711,78 +716,14 @@ namespace ntt { g_writer.beginWriting(WriteMode::Particles, current_step, current_time); const auto prtl_stride = params.template get( "output.particles.stride"); - for (const auto& prtl : g_writer.speciesWriters()) { - auto& species = local_domain->species[prtl.species() - 1]; - if (not species.is_sorted()) { - species.RemoveDead(); - } - const npart_t nout = species.npart() / prtl_stride; - array_t buff_x1, buff_x2, buff_x3; - array_t buff_ux1 { "u1", nout }; - array_t buff_ux2 { "ux2", nout }; - array_t buff_ux3 { "ux3", nout }; - array_t buff_wei { "w", nout }; - if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or - M::Dim == Dim::_3D) { - buff_x1 = array_t { "x1", nout }; - } - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - buff_x2 = array_t { "x2", nout }; - } - if constexpr (M::Dim == Dim::_3D or - ((D == Dim::_2D) and (M::CoordType != Coord::Cart))) { - buff_x3 = array_t { "x3", nout }; - } - if (nout > 0) { - // clang-format off - Kokkos::parallel_for( - "PrtlToPhys", - nout, - kernel::PrtlToPhys_kernel(prtl_stride, - buff_x1, buff_x2, buff_x3, - buff_ux1, buff_ux2, buff_ux3, - buff_wei, - species.i1, species.i2, species.i3, - species.dx1, species.dx2, species.dx3, - species.ux1, species.ux2, species.ux3, - species.phi, species.weight, - local_domain->mesh.metric)); - // clang-format on - } - npart_t offset = 0; - npart_t glob_tot = nout; -#if defined(MPI_ENABLED) - auto glob_nout = std::vector(g_ndomains); - MPI_Allgather(&nout, - 1, - mpi::get_type(), - glob_nout.data(), - 1, - mpi::get_type(), - MPI_COMM_WORLD); - glob_tot = 0; - for (auto r = 0; r < g_mpi_size; ++r) { - if (r < g_mpi_rank) { - offset += glob_nout[r]; - } - glob_tot += glob_nout[r]; - } -#endif // MPI_ENABLED - g_writer.writeParticleQuantity(buff_wei, glob_tot, offset, prtl.name("W", 0)); - g_writer.writeParticleQuantity(buff_ux1, glob_tot, offset, prtl.name("U", 1)); - g_writer.writeParticleQuantity(buff_ux2, glob_tot, offset, prtl.name("U", 2)); - g_writer.writeParticleQuantity(buff_ux3, glob_tot, offset, prtl.name("U", 3)); - if constexpr (M::Dim == Dim::_1D or M::Dim == Dim::_2D or - M::Dim == Dim::_3D) { - g_writer.writeParticleQuantity(buff_x1, glob_tot, offset, prtl.name("X", 1)); - } - if constexpr (M::Dim == Dim::_2D or M::Dim == Dim::_3D) { - g_writer.writeParticleQuantity(buff_x2, glob_tot, offset, prtl.name("X", 2)); - } - if constexpr (M::Dim == Dim::_3D or - ((D == Dim::_2D) and (M::CoordType != Coord::Cart))) { - g_writer.writeParticleQuantity(buff_x3, glob_tot, offset, prtl.name("X", 3)); - } + for (const auto spec : g_writer.speciesIndices()) { + local_domain->species[spec - 1].template OutputWrite( + g_writer.io(), + g_writer.writer(), + prtl_stride, + ndomains(), + local_domain->index(), + local_domain->mesh.metric); } g_writer.endWriting(WriteMode::Particles); } // end shouldWrite("particles", step, time) diff --git a/src/global/global.h b/src/global/global.h index adffcf6e9..0586a3720 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -139,6 +139,11 @@ namespace ntt { jx3 = 2 }; + enum pldi { + spcCtr = 0, + domIdx = 1 + }; + enum ParticleTag : short { dead = 0, alive diff --git a/src/output/particles.h b/src/output/particles.h deleted file mode 100644 index 0936e66f9..000000000 --- a/src/output/particles.h +++ /dev/null @@ -1,39 +0,0 @@ -/** - * @file output/particles.h - * @brief Defines the metadata for particle output - * @implements - * - out::OutputSpecies - */ - -#ifndef OUTPUT_PARTICLES_H -#define OUTPUT_PARTICLES_H - -#include "global.h" - -#include - -namespace out { - - class OutputSpecies { - const spidx_t m_sp; - - public: - OutputSpecies(spidx_t sp) : m_sp { sp } {} - - ~OutputSpecies() = default; - - [[nodiscard]] - auto species() const -> spidx_t { - return m_sp; - } - - [[nodiscard]] - auto name(const std::string& q, unsigned short c) const -> std::string { - return "p" + q + (c == 0 ? "" : std::to_string(c)) + "_" + - std::to_string(m_sp); - } - }; - -} // namespace out - -#endif // OUTPUT_PARTICLES_H diff --git a/src/output/writer.cpp b/src/output/writer.cpp index cc9ec0eb8..42d299a67 100644 --- a/src/output/writer.cpp +++ b/src/output/writer.cpp @@ -48,9 +48,8 @@ namespace out { m_trackers.insert({ type, tools::Tracker(type, interval, interval_time) }); } - auto Writer::shouldWrite(const std::string& type, - timestep_t step, - simtime_t time) -> bool { + auto Writer::shouldWrite(const std::string& type, timestep_t step, simtime_t time) + -> bool { if (m_trackers.find(type) != m_trackers.end()) { return m_trackers.at(type).shouldWrite(step, time); } else { @@ -163,32 +162,6 @@ namespace out { } } - void Writer::defineParticleOutputs(Dimension dim, - const std::vector& specs) { - m_prtl_writers.clear(); - for (const auto& s : specs) { - m_prtl_writers.emplace_back(s); - } - for (const auto& prtl : m_prtl_writers) { - for (auto d { 0u }; d < dim; ++d) { - m_io.DefineVariable(prtl.name("X", d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - for (auto d { 0u }; d < Dim::_3D; ++d) { - m_io.DefineVariable(prtl.name("U", d + 1), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - m_io.DefineVariable(prtl.name("W", 0), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); - } - } - void Writer::defineSpectraOutputs(const std::vector& specs) { m_spectra_writers.clear(); for (const auto& s : specs) { diff --git a/src/output/writer.h b/src/output/writer.h index cc3edc733..4fe0c194f 100644 --- a/src/output/writer.h +++ b/src/output/writer.h @@ -14,7 +14,6 @@ #include "utils/tools.h" #include "output/fields.h" -#include "output/particles.h" #include "output/spectra.h" #include @@ -62,9 +61,10 @@ namespace out { std::map m_trackers; std::vector m_flds_writers; - std::vector m_prtl_writers; std::vector m_spectra_writers; + std::vector m_species_indices; + WriteModeTags m_active_mode { WriteMode::None }; public: @@ -92,7 +92,6 @@ namespace out { Coord); void defineFieldOutputs(const SimEngine&, const std::vector&); - void defineParticleOutputs(Dimension, const std::vector&); void defineSpectraOutputs(const std::vector&); void writeMesh(unsigned short, @@ -115,19 +114,41 @@ namespace out { void beginWriting(WriteModeTags, timestep_t, simtime_t); void endWriting(WriteModeTags); + void addSpeciesIndex(spidx_t idx) { + m_species_indices.push_back(idx); + } + + void clearSpeciesIndex() { + m_species_indices.clear(); + } + /* getters -------------------------------------------------------------- */ + [[nodiscard]] + auto io() -> adios2::IO& { + return m_io; + } + + [[nodiscard]] + auto writer() -> adios2::Engine& { + return m_writer; + } + + [[nodiscard]] + auto speciesIndices() const -> const std::vector& { + return m_species_indices; + } + + [[nodiscard]] auto root() const -> const path_t& { return m_root; } + [[nodiscard]] auto fieldWriters() const -> const std::vector& { return m_flds_writers; } - auto speciesWriters() const -> const std::vector& { - return m_prtl_writers; - } - + [[nodiscard]] auto spectraWriters() const -> const std::vector& { return m_spectra_writers; } From fce7983847f414f1f32c562f728eb8b1455fff5e Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Oct 2025 15:50:37 -0400 Subject: [PATCH 098/154] writer for subview --- src/output/utils/writers.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/output/utils/writers.h b/src/output/utils/writers.h index 58fac2bf6..1da24e9ee 100644 --- a/src/output/utils/writers.h +++ b/src/output/utils/writers.h @@ -41,6 +41,33 @@ namespace out { std::size_t, std::size_t); + // template + // void Write1DSubArray(adios2::IO&, + // adios2::Engine&, + // const std::string&, + // const subarray1d_t&, + // std::size_t, + // std::size_t, + // std::size_t); + + template + void Write1DSubArray(adios2::IO& io, + adios2::Engine& writer, + const std::string& name, + const S& data, + std::size_t local_size, + std::size_t global_size, + std::size_t local_offset) { + const auto slice = range_tuple_t(0, local_size); + auto var = io.InquireVariable(name); + var.SetShape({ global_size }); + var.SetSelection(adios2::Box({ local_offset }, { local_size })); + + auto data_h = Kokkos::create_mirror_view(data); + Kokkos::deep_copy(data_h, data); + writer.Put(var, data_h.data(), adios2::Mode::Sync); + } + template void Write2DArray(adios2::IO&, adios2::Engine&, From 3a35b75320ef21903b9165e025227a8f5bf8a4bb Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Oct 2025 15:50:56 -0400 Subject: [PATCH 099/154] pld passed to prtl2phys kernel --- src/kernels/prtls_to_phys.hpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/kernels/prtls_to_phys.hpp b/src/kernels/prtls_to_phys.hpp index 4dd7d88b0..dfe039e86 100644 --- a/src/kernels/prtls_to_phys.hpp +++ b/src/kernels/prtls_to_phys.hpp @@ -39,11 +39,15 @@ namespace kernel { array_t buff_ux2; array_t buff_ux3; array_t buff_wei; + array_t buff_pldr; + array_t buff_pldi; const array_t i1, i2, i3; const array_t dx1, dx2, dx3; const array_t ux1, ux2, ux3; const array_t phi; const array_t weight; + const array_t pld_r; + const array_t pld_i; const M metric; public: @@ -55,6 +59,8 @@ namespace kernel { array_t& buff_ux2, array_t& buff_ux3, array_t& buff_wei, + array_t& buff_pldr, + array_t& buff_pldi, const array_t& i1, const array_t& i2, const array_t& i3, @@ -66,6 +72,8 @@ namespace kernel { const array_t& ux3, const array_t& phi, const array_t& weight, + const array_t& pld_r, + const array_t& pld_i, const M& metric) : stride { stride } , buff_x1 { buff_x1 } @@ -75,6 +83,8 @@ namespace kernel { , buff_ux2 { buff_ux2 } , buff_ux3 { buff_ux3 } , buff_wei { buff_wei } + , buff_pldr { buff_pldr } + , buff_pldi { buff_pldi } , i1 { i1 } , i2 { i2 } , i3 { i3 } @@ -86,6 +96,8 @@ namespace kernel { , ux3 { ux3 } , phi { phi } , weight { weight } + , pld_r { pld_r } + , pld_i { pld_i } , metric { metric } { if constexpr ((D == Dim::_1D) || (D == Dim::_2D) || (D == Dim::_3D)) { raise::ErrorIf(buff_x1.extent(0) == 0, "Invalid buffer size", HERE); @@ -106,6 +118,7 @@ namespace kernel { bufferX(p); bufferU(p); buff_wei(p) = weight(p * stride); + bufferPlds(p); } Inline void bufferX(index_t& p) const { @@ -199,6 +212,19 @@ namespace kernel { buff_ux2(p) = u_Phys[1]; buff_ux3(p) = u_Phys[2]; } + + Inline void bufferPlds(index_t& p) const { + if (buff_pldr.extent(0) > 0) { + for (auto pr { 0u }; pr < buff_pldr.extent(1); ++pr) { + buff_pldr(p, pr) = pld_r(p * stride, pr); + } + } + if (buff_pldi.extent(0) > 0) { + for (auto pi { 0u }; pi < buff_pldi.extent(1); ++pi) { + buff_pldi(p, pi) = pld_i(p * stride, pi); + } + } + } }; } // namespace kernel From bb41984bcbd2a0fcc5d5787beb06b5ba3b7d996c Mon Sep 17 00:00:00 2001 From: hayk Date: Fri, 24 Oct 2025 15:51:10 -0400 Subject: [PATCH 100/154] minor formatting --- src/global/arch/kokkos_aliases.h | 4 ++-- src/kernels/particle_pusher_sr.hpp | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/global/arch/kokkos_aliases.h b/src/global/arch/kokkos_aliases.h index adb0b6451..712fc6eff 100644 --- a/src/global/arch/kokkos_aliases.h +++ b/src/global/arch/kokkos_aliases.h @@ -234,8 +234,8 @@ auto CreateParticleRangePolicy(npart_t, npart_t) -> range_t; * @returns Kokkos::RangePolicy or Kokkos::MDRangePolicy in the accelerator execution space. */ template -auto CreateRangePolicy(const tuple_t&, - const tuple_t&) -> range_t; +auto CreateRangePolicy(const tuple_t&, const tuple_t&) + -> range_t; /** * @brief Function template for generating ND Kokkos range policy on the host. diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 91bc6a760..6bd4e1714 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -30,7 +30,9 @@ /* Local macros */ /* -------------------------------------------------------------------------- */ #define from_Xi_to_i(XI, I) \ - { I = static_cast((XI + 1)) - 1; } + { \ + I = static_cast((XI + 1)) - 1; \ + } #define from_Xi_to_i_di(XI, I, DI) \ { \ From 713fd6e585326edc0b8e1eb20d7ffbc3e4b142bf Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 27 Oct 2025 10:52:54 -0400 Subject: [PATCH 101/154] prtl tracking in output --- src/framework/containers/particles_io.cpp | 119 +++++++++++++++------- src/kernels/prtls_to_phys.hpp | 106 +++++++++---------- 2 files changed, 134 insertions(+), 91 deletions(-) diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index d1b571d71..9131e596a 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -97,7 +97,56 @@ namespace ntt { if (not is_sorted()) { RemoveDead(); } - const npart_t nout = npart() / prtl_stride; + npart_t nout; + array_t out_indices; + if (!use_tracking()) { + nout = npart() / prtl_stride; + } else { + nout = 0u; + Kokkos::parallel_reduce( + "CountOutputParticles", + npart(), + Lambda(index_t p, npart_t & l_nout) { + if ((tag(p) == ParticleTag::alive) and + (pld_i(p, pldi::spcCtr) % prtl_stride == 0)) { + l_nout += 1; + } + }, + nout); + out_indices = array_t { "out_indices", nout }; + array_t out_counter { "out_counter" }; + Kokkos::parallel_for( + "RecordOutputIndices", + npart(), + Lambda(index_t p) { + if ((tag(p) == ParticleTag::alive) and + (pld_i(p, pldi::spcCtr) % prtl_stride == 0)) { + const auto p_out = Kokkos::atomic_fetch_add(&out_counter(), 1); + out_indices(p_out) = p; + } + }); + } + + npart_t nout_offset = 0; + npart_t nout_total = nout; +#if defined(MPI_ENABLED) + auto nout_total_vec = std::vector(domains_total); + MPI_Allgather(&nout, + 1, + mpi::get_type(), + nout_total_vec.data(), + 1, + mpi::get_type(), + MPI_COMM_WORLD); + nout_total = 0; + for (auto r = 0; r < domains_total; ++r) { + if (r < domains_offset) { + nout_offset += nout_total_vec[r]; + } + nout_total += nout_total_vec[r]; + } +#endif // MPI_ENABLED + array_t buff_x1, buff_x2, buff_x3; array_t buff_ux1 { "ux1", nout }; array_t buff_ux2 { "ux2", nout }; @@ -123,42 +172,42 @@ namespace ntt { } if (nout > 0) { - // clang-format off - Kokkos::parallel_for( - "PrtlToPhys", - nout, - kernel::PrtlToPhys_kernel(prtl_stride, - buff_x1, buff_x2, buff_x3, - buff_ux1, buff_ux2, buff_ux3, - buff_wei, - buff_pldr, buff_pldi, - i1, i2, i3, - dx1, dx2, dx3, - ux1, ux2, ux3, - phi, weight, - pld_r, pld_i, - metric)); - // clang-format on - } - npart_t nout_offset = 0; - npart_t nout_total = nout; -#if defined(MPI_ENABLED) - auto nout_total_vec = std::vector(domains_total); - MPI_Allgather(&nout, - 1, - mpi::get_type(), - nout_total_vec.data(), - 1, - mpi::get_type(), - MPI_COMM_WORLD); - nout_total = 0; - for (auto r = 0; r < domains_total; ++r) { - if (r < domains_offset) { - nout_offset += nout_total_vec[r]; + if (!use_tracking()) { + // clang-format off + Kokkos::parallel_for( + "PrtlToPhys", + nout, + kernel::PrtlToPhys_kernel(prtl_stride, out_indices, + buff_x1, buff_x2, buff_x3, + buff_ux1, buff_ux2, buff_ux3, + buff_wei, + buff_pldr, buff_pldi, + i1, i2, i3, + dx1, dx2, dx3, + ux1, ux2, ux3, + phi, weight, + pld_r, pld_i, + metric)); + // clang-format on + } else { + // clang-format off + Kokkos::parallel_for( + "PrtlToPhys", + nout, + kernel::PrtlToPhys_kernel(prtl_stride, out_indices, + buff_x1, buff_x2, buff_x3, + buff_ux1, buff_ux2, buff_ux3, + buff_wei, + buff_pldr, buff_pldi, + i1, i2, i3, + dx1, dx2, dx3, + ux1, ux2, ux3, + phi, weight, + pld_r, pld_i, + metric)); + // clang-format on } - nout_total += nout_total_vec[r]; } -#endif // MPI_ENABLED out::Write1DArray(io, writer, fmt::format("pW_%d", index()), diff --git a/src/kernels/prtls_to_phys.hpp b/src/kernels/prtls_to_phys.hpp index dfe039e86..fbafbe00e 100644 --- a/src/kernels/prtls_to_phys.hpp +++ b/src/kernels/prtls_to_phys.hpp @@ -25,13 +25,14 @@ namespace kernel { using namespace ntt; - template + template class PrtlToPhys_kernel { static_assert(M::is_metric, "M must be a metric class"); static constexpr Dimension D = M::Dim; protected: const npart_t stride; + array_t out_indices; array_t buff_x1; array_t buff_x2; array_t buff_x3; @@ -52,6 +53,7 @@ namespace kernel { public: PrtlToPhys_kernel(npart_t stride, + array_t out_indices, array_t& buff_x1, array_t& buff_x2, array_t& buff_x3, @@ -76,6 +78,7 @@ namespace kernel { const array_t& pld_i, const M& metric) : stride { stride } + , out_indices { out_indices } , buff_x1 { buff_x1 } , buff_x2 { buff_x2 } , buff_x3 { buff_x3 } @@ -115,41 +118,44 @@ namespace kernel { } Inline void operator()(index_t p) const { - bufferX(p); - bufferU(p); - buff_wei(p) = weight(p * stride); - bufferPlds(p); + if constexpr (!T) { // no tracking enabled + bufferX(p * stride, p); + bufferU(p * stride, p); + buff_wei(p) = weight(p * stride); + bufferPlds(p * stride, p); + } else { + bufferX(out_indices(p), p); + bufferU(out_indices(p), p); + buff_wei(p) = weight(out_indices(p)); + bufferPlds(out_indices(p), p); + } } - Inline void bufferX(index_t& p) const { + Inline void bufferX(index_t& p_from, index_t& p_to) const { if constexpr ((D == Dim::_1D) || (D == Dim::_2D) || (D == Dim::_3D)) { - buff_x1(p) = metric.template convert<1, Crd::Cd, Crd::Ph>( - static_cast(i1(p * stride)) + - static_cast(dx1(p * stride))); + buff_x1(p_to) = metric.template convert<1, Crd::Cd, Crd::Ph>( + static_cast(i1(p_from)) + static_cast(dx1(p_from))); } if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - buff_x2(p) = metric.template convert<2, Crd::Cd, Crd::Ph>( - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride))); + buff_x2(p_to) = metric.template convert<2, Crd::Cd, Crd::Ph>( + static_cast(i2(p_from)) + static_cast(dx2(p_from))); } if constexpr ((D == Dim::_2D) && (M::CoordType != Coord::Cart)) { - buff_x3(p) = phi(p * stride); + buff_x3(p_to) = phi(p_from); } if constexpr (D == Dim::_3D) { - buff_x3(p) = metric.template convert<3, Crd::Cd, Crd::Ph>( - static_cast(i3(p * stride)) + - static_cast(dx3(p * stride))); + buff_x3(p_to) = metric.template convert<3, Crd::Cd, Crd::Ph>( + static_cast(i3(p_from)) + static_cast(dx3(p_from))); } } - Inline void bufferU(index_t& p) const { + Inline void bufferU(index_t& p_from, index_t& p_to) const { vec_t u_Phys { ZERO }; if constexpr (D == Dim::_1D) { if constexpr (M::CoordType == Coord::Cart) { metric.template transform_xyz( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else { raise::KernelError(HERE, "Unsupported coordinate system in 1D"); @@ -157,28 +163,22 @@ namespace kernel { } else if constexpr (D == Dim::_2D) { if constexpr (M::CoordType == Coord::Cart) { metric.template transform_xyz( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)), - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride)) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)), + static_cast(i2(p_from)) + static_cast(dx2(p_from)) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)), - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride)), - phi(p * stride) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)), + static_cast(i2(p_from)) + static_cast(dx2(p_from)), + phi(p_from) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)), - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride)) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)), + static_cast(i2(p_from)) + static_cast(dx2(p_from)) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else { raise::KernelError(HERE, "Unrecognized simulation engine"); @@ -186,42 +186,36 @@ namespace kernel { } else if constexpr (D == Dim::_3D) { if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)), - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride)), - static_cast(i3(p * stride)) + - static_cast(dx3(p * stride)) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)), + static_cast(i2(p_from)) + static_cast(dx2(p_from)), + static_cast(i3(p_from)) + static_cast(dx3(p_from)) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else if constexpr (S == SimEngine::GRPIC) { metric.template transform( - { static_cast(i1(p * stride)) + - static_cast(dx1(p * stride)), - static_cast(i2(p * stride)) + - static_cast(dx2(p * stride)), - static_cast(i3(p * stride)) + - static_cast(dx3(p * stride)) }, - { ux1(p * stride), ux2(p * stride), ux3(p * stride) }, + { static_cast(i1(p_from)) + static_cast(dx1(p_from)), + static_cast(i2(p_from)) + static_cast(dx2(p_from)), + static_cast(i3(p_from)) + static_cast(dx3(p_from)) }, + { ux1(p_from), ux2(p_from), ux3(p_from) }, u_Phys); } else { raise::KernelError(HERE, "Unrecognized simulation engine"); } } - buff_ux1(p) = u_Phys[0]; - buff_ux2(p) = u_Phys[1]; - buff_ux3(p) = u_Phys[2]; + buff_ux1(p_to) = u_Phys[0]; + buff_ux2(p_to) = u_Phys[1]; + buff_ux3(p_to) = u_Phys[2]; } - Inline void bufferPlds(index_t& p) const { + Inline void bufferPlds(index_t& p_from, index_t& p_to) const { if (buff_pldr.extent(0) > 0) { for (auto pr { 0u }; pr < buff_pldr.extent(1); ++pr) { - buff_pldr(p, pr) = pld_r(p * stride, pr); + buff_pldr(p_to, pr) = pld_r(p_from, pr); } } if (buff_pldi.extent(0) > 0) { for (auto pi { 0u }; pi < buff_pldi.extent(1); ++pi) { - buff_pldi(p, pi) = pld_i(p * stride, pi); + buff_pldi(p_to, pi) = pld_i(p_from, pi); } } } From 417f03c009e082aae79d24fe4ffe07f17e197333 Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 30 Oct 2025 14:16:25 -0400 Subject: [PATCH 102/154] reduced 1d from 2d deposit --- src/kernels/currents_deposit.hpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index 88d6edc00..c37d1ea50 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -424,19 +424,12 @@ namespace kernel { real_t Wx1[O + 2]; real_t Wx23[O + 2]; - // first seperate - Wx1[0] = fS_x1[0]; - Wx23[0] = HALF * fS_x1[0]; - // last seperate - Wx1[O + 1] = -iS_x1[O + 1]; - Wx23[O + 1] = HALF * iS_x1[O + 1]; - // Calculate weight function #pragma unroll - for (int i = 1; i < O + 1; ++i) { + for (int i = 0; i < O + 2; ++i) { // Esirkepov 2001, Eq. 38 for 1D case - Wx1[i] = fS_x1[i] - iS_x1[i - 1]; - Wx23[i] = HALF * (fS_x1[i] + iS_x1[i - 1]); + Wx1[i] = fS_x1[i] - iS_x1[i]; + Wx23[i] = HALF * (fS_x1[i] + iS_x1[i]); } // contribution within the shape function stencil From 65450c382b7ccae217457508d5812690a3b08a02 Mon Sep 17 00:00:00 2001 From: hayk Date: Mon, 3 Nov 2025 15:40:38 -0500 Subject: [PATCH 103/154] 11th order + printing fixed --- CMakeLists.txt | 2 +- cmake/styling.cmake | 31 +++++++++++++++++-------------- src/kernels/currents_deposit.hpp | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48e5689b7..260c3e877 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ if(${deposit} STREQUAL "zigzag") endif() set(shape_orders - "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" + "1;2;3;4;5;6;7;8;9;10;11" CACHE STRING "Shape orders") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.cmake) diff --git a/cmake/styling.cmake b/cmake/styling.cmake index 5f1e4a7ad..daae19c65 100644 --- a/cmake/styling.cmake +++ b/cmake/styling.cmake @@ -140,12 +140,6 @@ function( else() padto("${rstring}" " " ${Padding} rstring) - set(new_choices ${Choices}) - foreach(ch IN LISTS new_choices) - string(REPLACE ${ch} "${Dim}${ch}${ColorReset}" new_choices - "${new_choices}") - endforeach() - set(Choices ${new_choices}) if(${Value} STREQUAL "ON") set(col ${Green}) elseif(${Value} STREQUAL "OFF") @@ -153,14 +147,23 @@ function( else() set(col ${Color}) endif() - if(NOT "${Value}" STREQUAL "") - string(REPLACE ${Value} "${col}${Value}${ColorReset}" Choices - "${Choices}") - endif() - if(NOT "${Default}" STREQUAL "") - string(REPLACE ${Default} "${Underline}${Default}${ColorReset}" Choices - "${Choices}") - endif() + set(new_choices "") + foreach(ch IN LISTS Choices) + set(elem "${ch}") + if((NOT "${Value}" STREQUAL "") AND (${ch} STREQUAL ${Value})) + set(elem "${col}${ch}${ColorReset}") + else() + set(elem "${Dim}${ch}${ColorReset}") + endif() + if((NOT "${Default}" STREQUAL "") AND (${ch} STREQUAL ${Default})) + set(elem "${Underline}${elem}${ColorReset}") + endif() + string(APPEND new_choices "${elem};") + endforeach() + string(LENGTH "${new_choices}" nlen) + math(EXPR nlen "${nlen} - 1") + string(SUBSTRING "${new_choices}" 0 ${nlen} new_choices) + set(Choices ${new_choices}) string(REPLACE ";" "/" Choices "${Choices}") string(APPEND rstring "${Choices}") endif() diff --git a/src/kernels/currents_deposit.hpp b/src/kernels/currents_deposit.hpp index c37d1ea50..0fbab19f5 100644 --- a/src/kernels/currents_deposit.hpp +++ b/src/kernels/currents_deposit.hpp @@ -402,7 +402,7 @@ namespace kernel { cur::jx3) += Fx3_2 * Wx1_2 * Wx2_2; } } - } else if constexpr ((O >= 1u) and (O <= 10u)) { + } else if constexpr ((O >= 1u) and (O <= 11u)) { // shape function in dim1 -> always required real_t iS_x1[O + 2], fS_x1[O + 2]; From 4b5044df4296efef5c8c54c41c5899c15225a03e Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 3 Nov 2025 21:22:44 -0600 Subject: [PATCH 104/154] added option for initial B-field for Bell instability --- pgens/streaming/bell.toml | 86 +++++++++++++++++++++++++++++++++++++++ pgens/streaming/pgen.hpp | 45 +++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 pgens/streaming/bell.toml diff --git a/pgens/streaming/bell.toml b/pgens/streaming/bell.toml new file mode 100644 index 000000000..6671d19b2 --- /dev/null +++ b/pgens/streaming/bell.toml @@ -0,0 +1,86 @@ +[simulation] + name = "bell" + engine = "srpic" + runtime = 1000.0 + +[grid] + resolution = [1024, 1024] + extent = [[-10.0, 10.0], [-10.0, 10.0]] + + [grid.metric] + metric = "minkowski" + + [grid.boundaries] + fields = [["PERIODIC"], ["PERIODIC"]] + particles = [["PERIODIC"], ["PERIODIC"]] + +[scales] + larmor0 = 1.0 + skindepth0 = 1.0 + +[algorithms] + current_filters = 4 + + [algorithms.timestep] + CFL = 0.5 + +[particles] + ppc0 = 16.0 + + [[particles.species]] + label = "e-Px" + mass = 100.0 + charge = -1.0 + maxnpart = 1e8 + + [[particles.species]] + label = "e+bg1" + mass = 1.0 + charge = 1.0 + maxnpart = 1e8 + pusher = "None" + + [[particles.species]] + label = "e-Mx" + mass = 1.0 + charge = -1.0 + maxnpart = 1e8 + + [[particles.species]] + label = "e+bg2" + mass = 1.0 + charge = 1.0 + maxnpart = 1e0 + pusher = "None" + +[setup] + # Drift 4-velocities for each species in all 3 directions + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + drifts_in_x = [0.1, 0.0, 0.1, 0.0] + drifts_in_y = [0.0, 0.0, 0.0, 0.0] + drifts_in_z = [0.0, 0.0, 0.0, 0.0] + # Pair-wise species densities in units of n0 + # @type: array of floats (length = nspec/2) + # @default: [ 2 / nspec, ... ] + densities = [0.5, 0.5] + # Species temperatures in units of m0 (c^2) + # @type: array of floats (length = nspec) + # @default: [ 0.0, ... ] + temperatures = [1e-4, 1e-4, 1e-4, 1e-4] + # Magnetic field + B0 = [1.0, 0.0, 0.0] + +[output] + interval_time = 10.0 + format = "BPFile" + + [output.fields] + quantities = ["N_1", "N_3", "E", "B"] + + [output.particles] + species = [1, 3] + stride = 10 + + [output.spectra] + enable = false diff --git a/pgens/streaming/pgen.hpp b/pgens/streaming/pgen.hpp index 1b8311b34..52f35aa4a 100644 --- a/pgens/streaming/pgen.hpp +++ b/pgens/streaming/pgen.hpp @@ -16,6 +16,46 @@ namespace user { using namespace ntt; + using prmvec_t = std::vector; + + template + struct InitFields { + + InitFields(const prmvec_t& B0) + : B1 { ZERO } + , B2 { ZERO } + , B3 { ZERO } { + + // normalize the magnetic field vector + real_t B_norm = ONE / math::sqrt(SQR(B0[0]) + SQR(B0[1]) + SQR(B0[2])); + + // make sure we don't divide by zero + if (std::isinf(B_norm)) { + B_norm = ZERO; + } + + // assigne normalized B-field components + B1 = B0[0] * B_norm; + B2 = B0[1] * B_norm; + B3 = B0[2] * B_norm; + } + + // magnetic field components + Inline auto bx1(const coord_t&) const -> real_t { + return B1; + } + + Inline auto bx2(const coord_t&) const -> real_t { + return B2; + } + + Inline auto bx3(const coord_t&) const -> real_t { + return B3; + } + + private: + real_t B1, B2, B3; + }; template struct PGen : public arch::ProblemGenerator { @@ -31,16 +71,17 @@ namespace user { using arch::ProblemGenerator::C; using arch::ProblemGenerator::params; - using prmvec_t = std::vector; - prmvec_t drifts_in_x, drifts_in_y, drifts_in_z; prmvec_t densities, temperatures; + // initial magnetic field + InitFields init_flds; inline PGen(const SimulationParams& p, const Metadomain& global_domain) : arch::ProblemGenerator { p } , drifts_in_x { p.template get("setup.drifts_in_x", prmvec_t {}) } , drifts_in_y { p.template get("setup.drifts_in_y", prmvec_t {}) } , drifts_in_z { p.template get("setup.drifts_in_z", prmvec_t {}) } + , init_flds { p.template get("setup.B0", prmvec_t {}) } , densities { p.template get("setup.densities", prmvec_t {}) } , temperatures { p.template get("setup.temperatures", prmvec_t {}) } { const auto nspec = p.template get("particles.nspec"); From dbc039f3f1c439bcca66de2de7bb6623bbfa4ac0 Mon Sep 17 00:00:00 2001 From: haykh Date: Fri, 7 Nov 2025 10:09:29 -0800 Subject: [PATCH 105/154] capture this explicitly --- src/framework/containers/particles_io.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index 9131e596a..332c6e9d9 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -102,13 +102,15 @@ namespace ntt { if (!use_tracking()) { nout = npart() / prtl_stride; } else { - nout = 0u; + nout = 0u; + const auto tag_d = this->tag; + const auto pld_i_d = this->pld_i; Kokkos::parallel_reduce( "CountOutputParticles", npart(), Lambda(index_t p, npart_t & l_nout) { - if ((tag(p) == ParticleTag::alive) and - (pld_i(p, pldi::spcCtr) % prtl_stride == 0)) { + if ((tag_d(p) == ParticleTag::alive) and + (pld_i_d(p, pldi::spcCtr) % prtl_stride == 0)) { l_nout += 1; } }, @@ -119,8 +121,8 @@ namespace ntt { "RecordOutputIndices", npart(), Lambda(index_t p) { - if ((tag(p) == ParticleTag::alive) and - (pld_i(p, pldi::spcCtr) % prtl_stride == 0)) { + if ((tag_d(p) == ParticleTag::alive) and + (pld_i_d(p, pldi::spcCtr) % prtl_stride == 0)) { const auto p_out = Kokkos::atomic_fetch_add(&out_counter(), 1); out_indices(p_out) = p; } @@ -288,7 +290,7 @@ namespace ntt { out::Write1DSubArray( io, writer, - fmt::format("pIDX_%d", pr, index()), + fmt::format("pIDX_%d", index()), buff_sub, nout, nout_total, From 2e367b0f99771f3d85848dbecad229aef60b5dfe Mon Sep 17 00:00:00 2001 From: haykh Date: Sat, 8 Nov 2025 09:19:34 -0800 Subject: [PATCH 106/154] tests adjusted --- cmake/tests.cmake | 7 -- src/kernels/tests/prtls_to_phys.cpp | 103 +++++++++++++++++----------- src/output/tests/writer-mpi.cpp | 31 +++++---- 3 files changed, 80 insertions(+), 61 deletions(-) diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 189cc2cc4..0eb043f70 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -9,9 +9,6 @@ add_subdirectory(${SRC_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) add_subdirectory(${SRC_DIR}/archetypes ${CMAKE_CURRENT_BINARY_DIR}/archetypes) add_subdirectory(${SRC_DIR}/framework ${CMAKE_CURRENT_BINARY_DIR}/framework) add_subdirectory(${SRC_DIR}/output ${CMAKE_CURRENT_BINARY_DIR}/output) -if(${output}) - add_subdirectory(${SRC_DIR}/checkpoint ${CMAKE_CURRENT_BINARY_DIR}/checkpoint) -endif() set(TEST_DIRECTORIES "") @@ -27,10 +24,6 @@ endif() list(APPEND TEST_DIRECTORIES output) -if(${output}) - list(APPEND TEST_DIRECTORIES checkpoint) -endif() - foreach(test_dir IN LISTS TEST_DIRECTORIES) add_subdirectory(${SRC_DIR}/${test_dir}/tests ${CMAKE_CURRENT_BINARY_DIR}/${test_dir}/tests) diff --git a/src/kernels/tests/prtls_to_phys.cpp b/src/kernels/tests/prtls_to_phys.cpp index 962c21b5c..8f9d1760a 100644 --- a/src/kernels/tests/prtls_to_phys.cpp +++ b/src/kernels/tests/prtls_to_phys.cpp @@ -19,9 +19,7 @@ #include #include -#include #include -#include #include using namespace ntt; @@ -39,13 +37,15 @@ struct Checker { const array_t& ux2, const array_t& ux3, const array_t& weight, + const array_t& pld_i, const array_t& buff_x1, const array_t& buff_x2, const array_t& buff_x3, const array_t& buff_ux1, const array_t& buff_ux2, const array_t& buff_ux3, - const array_t& buff_wei) + const array_t& buff_wei, + const array_t& buff_pld_i) : metric { metric } , stride { stride } , i1 { i1 } @@ -57,13 +57,15 @@ struct Checker { , ux2 { ux2 } , ux3 { ux3 } , weight { weight } + , pld_i { pld_i } , buff_x1 { buff_x1 } , buff_x2 { buff_x2 } , buff_x3 { buff_x3 } , buff_ux1 { buff_ux1 } , buff_ux2 { buff_ux2 } , buff_ux3 { buff_ux3 } - , buff_wei { buff_wei } {} + , buff_wei { buff_wei } + , buff_pld_i { buff_pld_i } {} Inline void operator()(index_t p) const { std::size_t pold = p * stride; @@ -97,6 +99,9 @@ struct Checker { if (not cmp::AlmostEqual(weight(pold), buff_wei(p))) { raise::KernelError(HERE, "weight != buff_wei"); } + if (pld_i(pold, pldi::spcCtr) != buff_pld_i(p, pldi::spcCtr)) { + raise::KernelError(HERE, "weight != buff_wei"); + } } private: @@ -111,13 +116,15 @@ struct Checker { const array_t ux2; const array_t ux3; const array_t weight; - array_t buff_x1; - array_t buff_x2; - array_t buff_x3; - array_t buff_ux1; - array_t buff_ux2; - array_t buff_ux3; - array_t buff_wei; + const array_t pld_i; + const array_t buff_x1; + const array_t buff_x2; + const array_t buff_x3; + const array_t buff_ux1; + const array_t buff_ux2; + const array_t buff_ux3; + const array_t buff_wei; + const array_t buff_pld_i; }; template @@ -148,10 +155,14 @@ void testPrtl2PhysSR(const std::vector& res, array_t ux2 { "ux2", nprtl }; array_t ux3 { "ux3", nprtl }; array_t weight { "weight", nprtl }; + array_t pldr; + array_t pld_i { "pld_i", nprtl, 1 }; array_t i3; array_t dx3; + const std::size_t stride = 2; + array_t out_indices { "out_indices", nprtl / stride }; Kokkos::parallel_for( "Init", nprtl, @@ -166,40 +177,50 @@ void testPrtl2PhysSR(const std::vector& res, ux2(p) = ((real_t)(p) - (real_t)(nprtl) / 4) / (real_t)(9 * nprtl); ux3(p) = ((real_t)(p) - (real_t)(nprtl) / 2) / (real_t)(5 * nprtl); weight(p) = (real_t)(25) + (real_t)(p) / (real_t)(nprtl); + pld_i(p, pldi::spcCtr) = p; + if (p % stride == 0) { + out_indices(p / stride) = p; + } }); - const std::size_t stride = 2; - array_t buff_x1 { "buff_x1", nprtl / stride }; - array_t buff_x2 { "buff_x2", nprtl / stride }; - array_t buff_x3 { "buff_x3", nprtl / stride }; - array_t buff_ux1 { "buff_ux1", nprtl / stride }; - array_t buff_ux2 { "buff_ux2", nprtl / stride }; - array_t buff_ux3 { "buff_ux3", nprtl / stride }; - array_t buff_wei { "buff_wei", nprtl / stride }; + array_t buff_x1 { "buff_x1", nprtl / stride }; + array_t buff_x2 { "buff_x2", nprtl / stride }; + array_t buff_x3 { "buff_x3", nprtl / stride }; + array_t buff_ux1 { "buff_ux1", nprtl / stride }; + array_t buff_ux2 { "buff_ux2", nprtl / stride }; + array_t buff_ux3 { "buff_ux3", nprtl / stride }; + array_t buff_wei { "buff_wei", nprtl / stride }; + array_t buff_pldr; + array_t buff_pld_i { "pld_i", nprtl / stride, 1 }; Kokkos::parallel_for( "Init", Kokkos::RangePolicy(0, nprtl / stride), - kernel::PrtlToPhys_kernel(stride, - buff_x1, - buff_x2, - buff_x3, - buff_ux1, - buff_ux2, - buff_ux3, - buff_wei, - i1, - i2, - i3, - dx1, - dx2, - dx3, - ux1, - ux2, - ux3, - phi, - weight, - metric)); + kernel::PrtlToPhys_kernel(stride, + out_indices, + buff_x1, + buff_x2, + buff_x3, + buff_ux1, + buff_ux2, + buff_ux3, + buff_wei, + buff_pldr, + buff_pld_i, + i1, + i2, + i3, + dx1, + dx2, + dx3, + ux1, + ux2, + ux3, + phi, + weight, + pldr, + pld_i, + metric)); Kokkos::parallel_for("Check", nprtl / stride, Checker(metric, @@ -213,13 +234,15 @@ void testPrtl2PhysSR(const std::vector& res, ux2, ux3, weight, + pld_i, buff_x1, buff_x2, buff_x3, buff_ux1, buff_ux2, buff_ux3, - buff_wei)); + buff_wei, + buff_pld_i)); } auto main(int argc, char* argv[]) -> int { diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index bc95bbc81..dad981a40 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -17,8 +17,8 @@ void cleanup() { namespace fs = std::filesystem; - fs::path tempfile_path { "test.h5" }; - fs::remove(tempfile_path); + fs::path tempfile_path { "test.bp" }; + fs::remove_all(tempfile_path); } #define CEILDIV(a, b) \ @@ -61,7 +61,7 @@ auto main(int argc, char* argv[]) -> int { { // write auto writer = out::Writer(); - writer.init(&adios, "hdf5", "test", false); + writer.init(&adios, "BPFile", "test", false); writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, { static_cast(mpi_rank) * nx1 }, { nx1 }, @@ -91,16 +91,16 @@ auto main(int argc, char* argv[]) -> int { { // read adios2::IO io = adios.DeclareIO("read-test"); - io.SetEngine("HDF5"); - adios2::Engine reader = io.Open("test.h5", adios2::Mode::Read); - raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, - "NGhosts is not correct", - HERE); - raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 1, - "Dimension is not correct", - HERE); - for (std::size_t step = 0; reader.BeginStep() == adios2::StepStatus::OK; - ++step) { + io.SetEngine("BPFile"); + adios2::Engine reader = io.Open("test.bp", adios2::Mode::Read); + for (auto step = 0u; reader.BeginStep() == adios2::StepStatus::OK; ++step) { + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 1, + "Dimension is not correct", + HERE); + timestep_t step_read; simtime_t time_read; @@ -173,6 +173,7 @@ auto main(int argc, char* argv[]) -> int { } ++cntr; } + reader.EndStep(); } reader.Close(); } @@ -186,7 +187,9 @@ auto main(int argc, char* argv[]) -> int { Kokkos::finalize(); return 1; } - cleanup(); + CallOnce([]() { + cleanup(); + }); MPI_Finalize(); Kokkos::finalize(); return 0; From ca093bcc9a8e71397cf786d6367e2605d3d668f8 Mon Sep 17 00:00:00 2001 From: haykh Date: Sat, 8 Nov 2025 09:19:43 -0800 Subject: [PATCH 107/154] bump dev/nix versions --- dev/nix/adios2.nix | 18 ++++++++++++++++-- dev/nix/kokkos.nix | 15 ++------------- dev/nix/shell.nix | 43 +++++++++++++++++++++---------------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/dev/nix/adios2.nix b/dev/nix/adios2.nix index fb574c302..a3890b788 100644 --- a/dev/nix/adios2.nix +++ b/dev/nix/adios2.nix @@ -43,9 +43,23 @@ stdenv.mkDerivation { ] ++ ( if hdf5 then - (if mpi then [ pkgs.hdf5-mpi ] else [ pkgs.hdf5-cpp ]) + ( + if mpi then + [ + pkgs.hdf5-mpi + ] + else + [ pkgs.hdf5-cpp ] + ) else - (if mpi then [ pkgs.mpi ] else [ ]) + ( + if mpi then + [ + pkgs.mpi + ] + else + [ ] + ) ); configurePhase = '' diff --git a/dev/nix/kokkos.nix b/dev/nix/kokkos.nix index 2f6ee6b99..7d86e665b 100644 --- a/dev/nix/kokkos.nix +++ b/dev/nix/kokkos.nix @@ -7,7 +7,7 @@ let name = "kokkos"; - pversion = "4.6.01"; + pversion = "4.7.01"; compilerPkgs = { "HIP" = with pkgs.rocmPackages; [ llvm.rocm-merged-llvm @@ -56,7 +56,7 @@ pkgs.stdenv.mkDerivation rec { src = pkgs.fetchgit { url = "https://github.com/kokkos/kokkos/"; rev = "${pversion}"; - sha256 = "sha256-+yszUbdHqhIkJZiGLZ9Ln4DYUosuJWKhO8FkbrY0/tY="; + sha256 = "sha256-MgphOsKE8umgYxVQZzex+elgvDDC09JaMCoU5YXaLco="; }; nativeBuildInputs = with pkgs; [ @@ -92,15 +92,4 @@ pkgs.stdenv.mkDerivation rec { installPhase = '' cmake --install build ''; - - # cmakeFlags = [ - # "-D CMAKE_CXX_STANDARD=17" - # "-D CMAKE_CXX_EXTENSIONS=OFF" - # "-D CMAKE_POSITION_INDEPENDENT_CODE=TRUE" - # "-D Kokkos_ARCH_${getArch { }}=ON" - # (if gpu != "none" then "-D Kokkos_ENABLE_${gpu}=ON" else "") - # "-D CMAKE_BUILD_TYPE=Release" - # ] ++ (cmakeExtraFlags.${gpu} src); - - # enableParallelBuilding = true; } diff --git a/dev/nix/shell.nix b/dev/nix/shell.nix index 33ae57095..0d4cc9119 100644 --- a/dev/nix/shell.nix +++ b/dev/nix/shell.nix @@ -5,7 +5,7 @@ }, gpu ? "NONE", arch ? "NATIVE", - hdf5 ? true, + hdf5 ? false, mpi ? false, }: @@ -62,27 +62,26 @@ pkgs.mkShell { pkgs.zlib ]); - shellHook = - '' - BLUE='\033[0;34m' - NC='\033[0m' + shellHook = '' + BLUE='\033[0;34m' + NC='\033[0m' - echo "following environment variables are set:" - '' - + pkgs.lib.concatStringsSep "" ( - pkgs.lib.mapAttrsToList ( - category: vars: - pkgs.lib.concatStringsSep "" ( - pkgs.lib.mapAttrsToList (name: value: '' - export ${name}=${value} - echo -e " ''\${BLUE}${name}''\${NC}=${value}" - '') vars.${gpuUpper} - ) - ) envVars - ) - + '' - echo "" - echo -e "${name} nix-shell activated" - ''; + echo "following environment variables are set:" + '' + + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList ( + category: vars: + pkgs.lib.concatStringsSep "" ( + pkgs.lib.mapAttrsToList (name: value: '' + export ${name}=${value} + echo -e " ''\${BLUE}${name}''\${NC}=${value}" + '') vars.${gpuUpper} + ) + ) envVars + ) + + '' + echo "" + echo -e "${name} nix-shell activated" + ''; } From 2028daaf8ca740d45b3bab0d04cd717bfc5c1d6f Mon Sep 17 00:00:00 2001 From: haykh Date: Sat, 8 Nov 2025 10:40:58 -0800 Subject: [PATCH 108/154] fix adios2 tag --- cmake/dependencies.cmake | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 1780bf97e..1f3ed3c6a 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -58,7 +58,12 @@ function(find_or_fetch_dependency package_name header_only mode) FetchContent_Declare( ${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY} - GIT_TAG 4.6.01) + GIT_TAG 4.7.01) + elseif(${package_name} STREQUAL "adios2") + FetchContent_Declare( + ${package_name} + GIT_REPOSITORY ${${package_name}_REPOSITORY} + GIT_TAG v2.10.2) else() FetchContent_Declare(${package_name} GIT_REPOSITORY ${${package_name}_REPOSITORY}) From bb93d4cd34a066253ef31a109342f2f20a901f9b Mon Sep 17 00:00:00 2001 From: haykh Date: Sun, 9 Nov 2025 19:05:27 -0500 Subject: [PATCH 109/154] archetypes simplified --- compile_pgens.sh | 41 ++ pgens/accretion/accretion.toml | 2 +- pgens/accretion/pgen.hpp | 43 +- pgens/magnetosphere/magnetosphere.toml | 5 +- pgens/reconnection/pgen.hpp | 53 +- pgens/shock/shock.toml | 32 +- pgens/turbulence/pgen.hpp | 128 +++-- pgens/turbulence/turbulence.toml | 8 +- pgens/wald/wald.toml | 2 +- src/archetypes/particle_injector.h | 737 +++++++------------------ src/archetypes/spatial_dist.h | 50 +- src/archetypes/utils.h | 20 +- src/engines/srpic.hpp | 192 ++++--- src/kernels/injectors.hpp | 28 +- 14 files changed, 548 insertions(+), 793 deletions(-) create mode 100755 compile_pgens.sh diff --git a/compile_pgens.sh b/compile_pgens.sh new file mode 100755 index 000000000..2d5bdc9eb --- /dev/null +++ b/compile_pgens.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +pgens=("magnetosphere" "reconnection" "turbulence" "shock" "streaming" "accretion" "wald") +flags=("OFFLINE=ON") + +for pgen in "${pgens[@]}"; do + echo "Compiling pgen: $pgen" + flags_d="-D pgen=${pgen} " + for flag in "${flags[@]}"; do + flags_d+="-D ${flag}" + done + + ( + cmake -B "builds/build-${pgen}" $flags_d && + cmake --build "builds/build-${pgen}" -j && + mkdir -p "temp/${pgen}" && + cp "builds/build-${pgen}/src/entity.xc" "temp/${pgen}/" && + cp "pgens/${pgen}/"*.toml "temp/${pgen}/" + ) || { + echo "Failed to compile pgen: $pgen" + exit 1 + } +done + +for pgen in "${pgens[@]}"; do + cd "temp/${pgen}" || { + echo "no temp directory for $pgen" + exit 1 + } + tomls=$(find . -type f -name "*.toml") + for toml in "${tomls[@]}"; do + ( + echo "Running pgen: $pgen with config $toml" && + ./entity.xc -input "$toml" && + cd ../../ + ) || { + echo "Failed to run $pgen with config $toml" + exit 1 + } + done +done diff --git a/pgens/accretion/accretion.toml b/pgens/accretion/accretion.toml index 1ec641430..9215da6b2 100644 --- a/pgens/accretion/accretion.toml +++ b/pgens/accretion/accretion.toml @@ -67,7 +67,7 @@ m_eps = 1.0 [output] - format = "hdf5" + format = "BPFile" [output.fields] interval_time = 1.0 diff --git a/pgens/accretion/pgen.hpp b/pgens/accretion/pgen.hpp index 54a607352..2266f4e18 100644 --- a/pgens/accretion/pgen.hpp +++ b/pgens/accretion/pgen.hpp @@ -43,8 +43,7 @@ namespace user { TWO * metric.spin() * g_00); } - Inline auto bx1(const coord_t& x_Ph) const - -> real_t { // at ( i , j + HALF ) + Inline auto bx1(const coord_t& x_Ph) const -> real_t { // at ( i , j + HALF ) coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; metric.template convert(x_Ph, xi); @@ -62,8 +61,7 @@ namespace user { } } - Inline auto bx2(const coord_t& x_Ph) const - -> real_t { // at ( i + HALF , j ) + Inline auto bx2(const coord_t& x_Ph) const -> real_t { // at ( i + HALF , j ) coord_t xi { ZERO }, x0m { ZERO }, x0p { ZERO }; metric.template convert(x_Ph, xi); @@ -242,16 +240,14 @@ namespace user { params, &local_domain); - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - injector, - 1.0, - true); + arch::InjectNonUniform( + params, + local_domain, + { 1, 2 }, + { energy_dist, energy_dist }, + spatial_dist, + ONE, + true); } void CustomPostStep(std::size_t, long double time, Domain& local_domain) { @@ -264,17 +260,14 @@ namespace user { multiplicity * nGJ, params, &local_domain); - - const auto injector = - arch::NonUniformInjector( - energy_dist, - spatial_dist, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - injector, - 1.0, - true); + arch::InjectNonUniform( + params, + local_domain, + { 1, 2 }, + { energy_dist, energy_dist }, + spatial_dist, + ONE, + true); } }; diff --git a/pgens/magnetosphere/magnetosphere.toml b/pgens/magnetosphere/magnetosphere.toml index 0eb9a03f3..4a8eca87e 100644 --- a/pgens/magnetosphere/magnetosphere.toml +++ b/pgens/magnetosphere/magnetosphere.toml @@ -63,7 +63,7 @@ period = 60.0 [output] - format = "hdf5" + format = "BPFile" [output.fields] interval_time = 0.1 @@ -74,6 +74,3 @@ [output.spectra] enable = false - -[diagnostics] - interval = 50 diff --git a/pgens/reconnection/pgen.hpp b/pgens/reconnection/pgen.hpp index 91aa46394..8af53c52e 100644 --- a/pgens/reconnection/pgen.hpp +++ b/pgens/reconnection/pgen.hpp @@ -4,7 +4,6 @@ #include "enums.h" #include "global.h" -#include "arch/directions.h" #include "arch/kokkos_aliases.h" #include "arch/traits.h" #include "utils/numeric.h" @@ -139,15 +138,6 @@ namespace user { }; // constant particle density for particle boundaries - template - struct ConstDens { - Inline auto operator()(const coord_t& x_Ph) const -> real_t { - return ONE; - } - }; - template - using spatial_dist_t = arch::Replenish>; - template struct PGen : public arch::ProblemGenerator { // compatibility traits for the problem generator @@ -224,21 +214,18 @@ namespace user { auto edist_cs = arch::Maxwellian(local_domain.mesh.metric, local_domain.random_pool, cs_temperature, - cs_drift_u, - in::x3, - false); + { ZERO, ZERO, cs_drift_u }); const auto sdist_cs = CurrentLayer(local_domain.mesh.metric, cs_width, cs_x, cs_y); - const auto inj_cs = arch::NonUniformInjector( - edist_cs, + arch::InjectNonUniform( + params, + local_domain, + { 1, 2 }, + { edist_cs, edist_cs }, sdist_cs, - { 1, 2 }); - arch::InjectNonUniform(params, - local_domain, - inj_cs, - cs_overdensity); + cs_overdensity); } void CustomPostStep(timestep_t, simtime_t time, Domain& domain) { @@ -303,28 +290,26 @@ namespace user { Kokkos::Experimental::contribute(domain.fields.buff, scatter_buff); } - const auto injector_up = arch::KeepConstantInjector( - energy_dist, - { 1, 2 }, - 0u, - probe_box_up); - const auto injector_down = arch::KeepConstantInjector( - energy_dist, - { 1, 2 }, + const auto replenish_sdist = arch::ReplenishUniform( + domain.mesh.metric, + domain.fields.buff, 0u, - probe_box_down); - - arch::InjectUniform( + ONE); + arch::InjectNonUniform( params, domain, - injector_up, + { 1, 2 }, + { energy_dist, energy_dist }, + replenish_sdist, ONE, params.template get("particles.use_weights"), inj_box_up); - arch::InjectUniform( + arch::InjectNonUniform( params, domain, - injector_down, + { 1, 2 }, + { energy_dist, energy_dist }, + replenish_sdist, ONE, params.template get("particles.use_weights"), inj_box_down); diff --git a/pgens/shock/shock.toml b/pgens/shock/shock.toml index 90678488a..4148d3613 100644 --- a/pgens/shock/shock.toml +++ b/pgens/shock/shock.toml @@ -4,7 +4,7 @@ runtime = 50.0 [simulation.domain] - decomposition = [1,-1] + decomposition = [1, -1] [grid] resolution = [4096, 128] @@ -14,10 +14,10 @@ metric = "minkowski" [grid.boundaries] - fields = [["CONDUCTOR", "MATCH"], ["PERIODIC"]] + fields = [["CONDUCTOR", "MATCH"], ["PERIODIC"]] particles = [["REFLECT", "ABSORB"], ["PERIODIC"]] - + [scales] larmor0 = 0.057735 skindepth0 = 0.01 @@ -44,27 +44,27 @@ maxnpart = 8e7 [setup] - drift_ux = 0.15 # speed towards the wall [c] - temperature = 0.001683 # temperature of maxwell distribution [kB T / (m_i c^2)] - temperature_ratio = 1.0 # temperature ratio of electrons to protons - Bmag = 1.0 # magnetic field strength as fraction of magnetisation - Btheta = 63.0 # magnetic field angle in the plane - Bphi = 0.0 # magnetic field angle out of plane - filling_fraction = 0.1 # fraction of the shock piston filled with plasma - injector_velocity = 0.2 # speed of injector [c] - injection_start = 0.0 # start time of moving injector - injection_frequency = 100 # inject particles every 100 timesteps + drift_ux = 0.15 # speed towards the wall [c] + temperature = 0.001683 # temperature of maxwell distribution [kB T / (m_i c^2)] + temperature_ratio = 1.0 # temperature ratio of electrons to protons + Bmag = 1.0 # magnetic field strength as fraction of magnetisation + Btheta = 63.0 # magnetic field angle in the plane + Bphi = 0.0 # magnetic field angle out of plane + filling_fraction = 0.1 # fraction of the shock piston filled with plasma + injector_velocity = 0.2 # speed of injector [c] + injection_start = 0.0 # start time of moving injector + injection_frequency = 100 # inject particles every 100 timesteps [output] interval_time = 0.1 - format = "hdf5" - + format = "BPFile" + [output.fields] quantities = ["N_1", "N_2", "B", "E"] [output.particles] enable = true - stride = 10 + stride = 10 [output.spectra] enable = false diff --git a/pgens/turbulence/pgen.hpp b/pgens/turbulence/pgen.hpp index 4c4a2c78e..e8001b090 100644 --- a/pgens/turbulence/pgen.hpp +++ b/pgens/turbulence/pgen.hpp @@ -11,6 +11,7 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" +#include "archetypes/utils.h" #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" @@ -38,22 +39,24 @@ namespace user { Inline auto bx1(const coord_t& x_Ph) const -> real_t { auto bx1_0 = ZERO; - if constexpr(D==Dim::_2D){ + if constexpr (D == Dim::_2D) { for (auto i = 0; i < n_modes; i++) { auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1]; bx1_0 -= TWO * k(1, i) * - (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + (a_real(i) * math::sin(k_dot_r) + + a_imag(i) * math::cos(k_dot_r)); bx1_0 -= TWO * k(1, i) * - (a_real_inv(i) * math::sin(k_dot_r) + - a_imag_inv(i) * math::cos(k_dot_r)); + (a_real_inv(i) * math::sin(k_dot_r) + + a_imag_inv(i) * math::cos(k_dot_r)); } return bx1_0; } - if constexpr (D==Dim::_3D){ - for (auto i = 0; i < n_modes; i++) { - auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; + if constexpr (D == Dim::_3D) { + for (auto i = 0; i < n_modes; i++) { + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; bx1_0 -= TWO * k(1, i) * - (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + (a_real(i) * math::sin(k_dot_r) + + a_imag(i) * math::cos(k_dot_r)); } return bx1_0; } @@ -61,22 +64,24 @@ namespace user { Inline auto bx2(const coord_t& x_Ph) const -> real_t { auto bx2_0 = ZERO; - if constexpr (D==Dim::_2D){ + if constexpr (D == Dim::_2D) { for (auto i = 0; i < n_modes; i++) { auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1]; bx2_0 += TWO * k(0, i) * - (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + (a_real(i) * math::sin(k_dot_r) + + a_imag(i) * math::cos(k_dot_r)); bx2_0 += TWO * k(0, i) * (a_real_inv(i) * math::sin(k_dot_r) + a_imag_inv(i) * math::cos(k_dot_r)); } return bx2_0; } - if constexpr (D==Dim::_3D){ + if constexpr (D == Dim::_3D) { for (auto i = 0; i < n_modes; i++) { - auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; + auto k_dot_r = k(0, i) * x_Ph[0] + k(1, i) * x_Ph[1] + k(2, i) * x_Ph[2]; bx2_0 += TWO * k(0, i) * - (a_real(i) * math::sin(k_dot_r) + a_imag(i) * math::cos(k_dot_r)); + (a_real(i) * math::sin(k_dot_r) + + a_imag(i) * math::cos(k_dot_r)); } return bx2_0; } @@ -94,8 +99,8 @@ namespace user { std::size_t n_modes; }; - inline auto init_pool(int seed) -> unsigned int { - if (seed < 0) { + inline auto init_pool(unsigned int seed) -> unsigned int { + if (seed == 0) { unsigned int new_seed = static_cast(rand()); #if defined(MPI_ENABLED) MPI_Bcast(&new_seed, 1, MPI_UNSIGNED, MPI_ROOT_RANK, MPI_COMM_WORLD); @@ -117,14 +122,14 @@ namespace user { }; } else if constexpr (D == Dim::_3D) { return { - { 1, 0, 1 }, - { 0, 1, 1 }, - { -1, 0, 1 }, - { 0, -1, 1 }, - { 1, 0,-1 }, - { 0, 1,-1 }, - { -1, 0,-1 }, - { 0, -1,-1 } + { 1, 0, 1 }, + { 0, 1, 1 }, + { -1, 0, 1 }, + { 0, -1, 1 }, + { 1, 0, -1 }, + { 0, 1, -1 }, + { -1, 0, -1 }, + { 0, -1, -1 } }; } else { raise::Error("Invalid dimension", HERE); @@ -158,7 +163,7 @@ namespace user { , a_real_inv { "a_real_inv", n_modes } , a_imag_inv { "a_imag_inv", n_modes } , A0 { "A0", n_modes } { - // initializing random generator + // initializing random generator srand(seed); // initializing wavevectors auto k_host = Kokkos::create_mirror_view(k); @@ -191,11 +196,13 @@ namespace user { for (auto i = 0u; i < n_modes; i++) { auto k_perp = math::sqrt( k_host(0, i) * k_host(0, i) + k_host(1, i) * k_host(1, i)); - real_t phase = static_cast (rand()) / static_cast (RAND_MAX) * constant::TWO_PI; - A0_host(i) = dB / math::sqrt((real_t)n_modes) / k_perp * prefac; - a_real_host(i) = A0_host(i) * math::cos(phase); - a_imag_host(i) = A0_host(i) * math::sin(phase); - phase = static_cast (rand()) / static_cast (RAND_MAX) * constant::TWO_PI; + real_t phase = static_cast(rand()) / + static_cast(RAND_MAX) * constant::TWO_PI; + A0_host(i) = dB / math::sqrt((real_t)n_modes) / k_perp * prefac; + a_real_host(i) = A0_host(i) * math::cos(phase); + a_imag_host(i) = A0_host(i) * math::sin(phase); + phase = static_cast(rand()) / static_cast(RAND_MAX) * + constant::TWO_PI; a_imag_inv_host(i) = A0_host(i) * math::cos(phase); a_real_inv_host(i) = A0_host(i) * math::sin(phase); } @@ -270,7 +277,7 @@ namespace user { const std::vector> wavenumbers; const std::size_t n_modes; const real_t dB, Lx, Ly, Lz; - const int seed; + const unsigned int seed; public: const real_t omega_0, gamma_0; @@ -297,7 +304,7 @@ namespace user { const real_t temperature, dB, omega_0, gamma_0; const real_t Lx, Ly, Lz, escape_dist; - const int random_seed; + const unsigned int random_seed; std::vector> wavenumbers; random_number_pool_t random_pool; @@ -316,7 +323,7 @@ namespace user { , omega_0 { p.template get("setup.omega_0") } , gamma_0 { p.template get("setup.gamma_0") } , wavenumbers { init_wavenumbers() } - , random_seed { p.template get("setup.seed", -1) } + , random_seed { p.template get("setup.seed", 0) } , random_pool { init_pool(random_seed) } , Lx { global_domain.mesh().extent(in::x1).second - global_domain.mesh().extent(in::x1).first } @@ -325,32 +332,23 @@ namespace user { , Lz { global_domain.mesh().extent(in::x3).second - global_domain.mesh().extent(in::x3).first } , escape_dist { p.template get("setup.escape_dist", HALF * Lx) } - , ext_current { dB, omega_0, gamma_0, wavenumbers, init_pool(random_seed), Lx, Ly, Lz } + , ext_current { dB, omega_0, gamma_0, wavenumbers, init_pool(random_seed), + Lx, Ly, Lz } , init_flds { ext_current.k, ext_current.a_real, ext_current.a_imag, ext_current.a_real_inv, ext_current.a_imag_inv } {}; - inline void InitPrtls(Domain& local_domain) { - const auto energy_dist = arch::Maxwellian(local_domain.mesh.metric, - local_domain.random_pool, - temperature); - const auto spatial_dist = arch::UniformInjector( - energy_dist, - { 1, 2 }); - arch::InjectUniform>( - params, - local_domain, - spatial_dist, - ONE); - }; + inline void InitPrtls(Domain& domain) { + arch::InjectUniformMaxwellian(params, domain, ONE, temperature, { 1, 2 }); + } void CustomPostStep(timestep_t, simtime_t, Domain& domain) { - #if defined(MPI_ENABLED) - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - #endif +#if defined(MPI_ENABLED) + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif // update amplitudes of antenna const auto dt = params.template get("algorithms.timestep.dt"); const auto& ext_curr = ext_current; @@ -403,13 +401,13 @@ namespace user { domain.random_pool, temperature); for (const auto& sp : { 0, 1 }) { - if (domain.species[sp].npld() > 1) { - const auto& ux1 = domain.species[sp].ux1; - const auto& ux2 = domain.species[sp].ux2; - const auto& ux3 = domain.species[sp].ux3; - const auto& pld = domain.species[sp].pld; - const auto& tag = domain.species[sp].tag; - const auto L = escape_dist; + if (domain.species[sp].npld_r() > 1) { + const auto& ux1 = domain.species[sp].ux1; + const auto& ux2 = domain.species[sp].ux2; + const auto& ux3 = domain.species[sp].ux3; + const auto& pld_r = domain.species[sp].pld_r; + const auto& tag = domain.species[sp].tag; + const auto L = escape_dist; Kokkos::parallel_for( "UpdatePld", domain.species[sp].npart(), @@ -419,18 +417,18 @@ namespace user { } const auto gamma = math::sqrt( ONE + ux1(p) * ux1(p) + ux2(p) * ux2(p) + ux3(p) * ux3(p)); - pld(p, 0) += ux1(p) * dt / gamma; - pld(p, 1) += ux2(p) * dt / gamma; + pld_r(p, 0) += ux1(p) * dt / gamma; + pld_r(p, 1) += ux2(p) * dt / gamma; - if ((math::abs(pld(p, 0)) > L) or (math::abs(pld(p, 1)) > L)) { + if ((math::abs(pld_r(p, 0)) > L) or (math::abs(pld_r(p, 1)) > L)) { coord_t x_Ph { ZERO }; vec_t u_Mxw { ZERO }; energy_dist(x_Ph, u_Mxw); - ux1(p) = u_Mxw[0]; - ux2(p) = u_Mxw[1]; - ux3(p) = u_Mxw[2]; - pld(p, 0) = ZERO; - pld(p, 1) = ZERO; + ux1(p) = u_Mxw[0]; + ux2(p) = u_Mxw[1]; + ux3(p) = u_Mxw[2]; + pld_r(p, 0) = ZERO; + pld_r(p, 1) = ZERO; } }); } diff --git a/pgens/turbulence/turbulence.toml b/pgens/turbulence/turbulence.toml index 79cc641ef..a79bd07ad 100644 --- a/pgens/turbulence/turbulence.toml +++ b/pgens/turbulence/turbulence.toml @@ -42,12 +42,12 @@ [setup] temperature = 1e0 dB = 1.0 - omega_0 = 0.0156 - gamma_0 = 0.0078 - + omega_0 = 0.0156 + gamma_0 = 0.0078 + [output] - format = "hdf5" + format = "BPFile" interval_time = 12.0 [output.fields] diff --git a/pgens/wald/wald.toml b/pgens/wald/wald.toml index 2b05fbac9..7cdff5717 100644 --- a/pgens/wald/wald.toml +++ b/pgens/wald/wald.toml @@ -41,7 +41,7 @@ init_field = "wald" # or "vertical" [output] - format = "hdf5" + format = "BPFile" [output.fields] interval_time = 1.0 diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index f92f6defa..634c11d1a 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -2,9 +2,9 @@ * @file archetypes/particle_injector.h * @brief Particle injector routines and classes * @implements - * - arch::UniformInjector<> - * - arch::NonUniformInjector<> - * - arch::AtmosphereInjector<> + * - arch::DeduceRegion<> -> tuple, array_t> + * - arch::ComputeNumInject<> -> tuple, array_t> + * - arch::AtmosphereDensityProfile<> * - arch::InjectUniform<> -> void * - arch::InjectGlobally<> -> void * - arch::InjectNonUniform<> -> void @@ -22,13 +22,10 @@ #include "utils/error.h" #include "utils/numeric.h" -#include "archetypes/energy_dist.h" -#include "archetypes/spatial_dist.h" #include "framework/domain/domain.h" #include "framework/domain/metadomain.h" #include "kernels/injectors.hpp" -#include "kernels/utils.hpp" #include @@ -44,515 +41,173 @@ namespace arch { using namespace ntt; + /** + * @brief Deduces the region of injection in computational coordinates + * @param domain Domain object + * @param box Region to inject the particles in global coords + * @tparam S Simulation engine type + * @tparam M Metric type + * @return Tuple containing: + * - bool: whether the region intersects with the local domain + * - array_t: minimum coordinates of the region in computational coords + * - array_t: maximum coordinates of the region in computational coords + */ template - struct BaseInjector { - virtual auto DeduceRegion(const Domain& domain, - const boundaries_t& box) const - -> std::tuple, array_t> { - if (not domain.mesh.Intersects(box)) { - return { false, array_t {}, array_t {} }; - } - coord_t xCorner_min_Ph { ZERO }; - coord_t xCorner_max_Ph { ZERO }; - coord_t xCorner_min_Cd { ZERO }; - coord_t xCorner_max_Cd { ZERO }; - - for (auto d { 0u }; d < M::Dim; ++d) { - const auto local_xi_min = domain.mesh.extent(static_cast(d)).first; - const auto local_xi_max = domain.mesh.extent(static_cast(d)).second; - const auto extent_min = std::min(std::max(local_xi_min, box[d].first), - local_xi_max); - const auto extent_max = std::max(std::min(local_xi_max, box[d].second), - local_xi_min); - xCorner_min_Ph[d] = extent_min; - xCorner_max_Ph[d] = extent_max; - } - domain.mesh.metric.template convert(xCorner_min_Ph, - xCorner_min_Cd); - domain.mesh.metric.template convert(xCorner_max_Ph, - xCorner_max_Cd); - - array_t xi_min { "xi_min", M::Dim }, xi_max { "xi_max", M::Dim }; - - auto xi_min_h = Kokkos::create_mirror_view(xi_min); - auto xi_max_h = Kokkos::create_mirror_view(xi_max); - for (auto d { 0u }; d < M::Dim; ++d) { - xi_min_h(d) = xCorner_min_Cd[d]; - xi_max_h(d) = xCorner_max_Cd[d]; - } - Kokkos::deep_copy(xi_min, xi_min_h); - Kokkos::deep_copy(xi_max, xi_max_h); - - return { true, xi_min, xi_max }; + auto DeduceRegion(const Domain& domain, const boundaries_t& box) + -> std::tuple, array_t> { + if (not domain.mesh.Intersects(box)) { + return { false, array_t {}, array_t {} }; } - - virtual auto ComputeNumInject(const SimulationParams& params, - const Domain& domain, - real_t number_density, - const boundaries_t& box) const - -> std::tuple, array_t> { - const auto result = DeduceRegion(domain, box); - if (not std::get<0>(result)) { - return { false, (npart_t)0, array_t {}, array_t {} }; - } - const auto xi_min = std::get<1>(result); - const auto xi_max = std::get<2>(result); - auto xi_min_h = Kokkos::create_mirror_view(xi_min); - auto xi_max_h = Kokkos::create_mirror_view(xi_max); - Kokkos::deep_copy(xi_min_h, xi_min); - Kokkos::deep_copy(xi_max_h, xi_max); - - long double num_cells { 1.0 }; - for (auto d { 0u }; d < M::Dim; ++d) { - num_cells *= static_cast(xi_max_h(d)) - - static_cast(xi_min_h(d)); - } - - const auto ppc0 = params.template get("particles.ppc0"); - const auto nparticles = static_cast( - (long double)(ppc0 * number_density * 0.5) * num_cells); - - return { true, nparticles, xi_min, xi_max }; + coord_t xCorner_min_Ph { ZERO }; + coord_t xCorner_max_Ph { ZERO }; + coord_t xCorner_min_Cd { ZERO }; + coord_t xCorner_max_Cd { ZERO }; + + for (auto d { 0u }; d < M::Dim; ++d) { + const auto local_xi_min = domain.mesh.extent(static_cast(d)).first; + const auto local_xi_max = domain.mesh.extent(static_cast(d)).second; + const auto extent_min = std::min(std::max(local_xi_min, box[d].first), + local_xi_max); + const auto extent_max = std::max(std::min(local_xi_max, box[d].second), + local_xi_min); + xCorner_min_Ph[d] = extent_min; + xCorner_max_Ph[d] = extent_max; } - }; - - // template - // class ED> struct UniformInjector : BaseInjector { - // using energy_dist_t = ED; - // static_assert(M::is_metric, "M must be a metric class"); - // static_assert(energy_dist_t::is_energy_dist, - // "E must be an energy distribution class"); - // static constexpr bool is_uniform_injector { true }; - // static constexpr Dimension D { M::Dim }; - // static constexpr Coord C { M::CoordType }; - // - // const energy_dist_t energy_dist; - // const std::pair species; - // - // UniformInjector(const energy_dist_t& energy_dist, - // const std::pair& species) - // : energy_dist { energy_dist } - // , species { species } {} - // - // ~UniformInjector() = default; - // }; - - // template class ED> - // struct KeepConstantInjector : UniformInjector { - // using energy_dist_t = ED; - // using UniformInjector::D; - // using UniformInjector::C; - // - // const idx_t density_buff_idx; - // boundaries_t probe_box; - // - // KeepConstantInjector(const energy_dist_t& energy_dist, - // const std::pair& species, - // idx_t density_buff_idx, boundaries_t box = {}) - // : UniformInjector { energy_dist, species } - // , density_buff_idx { density_buff_idx } { - // for (auto d { 0u }; d < M::Dim; ++d) { - // if (d < box.size()) { - // probe_box.push_back({ box[d].first, box[d].second }); - // } else { - // probe_box.push_back(Range::All); - // } - // } - // } - // - // ~KeepConstantInjector() = default; - // - // auto ComputeAvgDensity(const SimulationParams& params, - // const Domain& domain) const -> real_t { - // const auto result = this->DeduceRegion(domain, probe_box); - // const auto should_probe = std::get<0>(result); - // if (not should_probe) { - // return ZERO; - // } - // const auto xi_min_arr = std::get<1>(result); - // const auto xi_max_arr = std::get<2>(result); - // - // tuple_t i_min { 0 }; - // tuple_t i_max { 0 }; - // - // auto xi_min_h = Kokkos::create_mirror_view(xi_min_arr); - // auto xi_max_h = Kokkos::create_mirror_view(xi_max_arr); - // Kokkos::deep_copy(xi_min_h, xi_min_arr); - // Kokkos::deep_copy(xi_max_h, xi_max_arr); - // - // ncells_t num_cells = 1u; - // for (auto d { 0u }; d < M::Dim; ++d) { - // i_min[d] = std::floor(xi_min_h(d)) + N_GHOSTS; - // i_max[d] = std::ceil(xi_max_h(d)) + N_GHOSTS; - // num_cells *= (i_max[d] - i_min[d]); - // } - // - // real_t dens { ZERO }; - // if (should_probe) { - // Kokkos::parallel_reduce( - // "AvgDensity", - // CreateRangePolicy(i_min, i_max), - // kernel::ComputeSum_kernel(domain.fields.buff, density_buff_idx), - // dens); - // } - // #if defined(MPI_ENABLED) - // real_t tot_dens { ZERO }; - // ncells_t tot_num_cells { 0 }; - // MPI_Allreduce(&dens, &tot_dens, 1, mpi::get_type(), MPI_SUM, MPI_COMM_WORLD); - // MPI_Allreduce(&num_cells, - // &tot_num_cells, - // 1, - // mpi::get_type(), - // MPI_SUM, - // MPI_COMM_WORLD); - // dens = tot_dens; - // num_cells = tot_num_cells; - // #endif - // if (num_cells > 0) { - // return dens / (real_t)(num_cells); - // } else { - // return ZERO; - // } - // } - // - // auto ComputeNumInject(const SimulationParams& params, - // const Domain& domain, - // real_t number_density, - // const boundaries_t& box) const - // -> std::tuple, array_t> override { - // const auto computed_avg_density = ComputeAvgDensity(params, domain); - // - // const auto result = this->DeduceRegion(domain, box); - // if (not std::get<0>(result)) { - // return { false, (npart_t)0, array_t {}, array_t {} }; - // } - // - // const auto xi_min = std::get<1>(result); - // const auto xi_max = std::get<2>(result); - // auto xi_min_h = Kokkos::create_mirror_view(xi_min); - // auto xi_max_h = Kokkos::create_mirror_view(xi_max); - // Kokkos::deep_copy(xi_min_h, xi_min); - // Kokkos::deep_copy(xi_max_h, xi_max); - // - // long double num_cells { 1.0 }; - // for (auto d { 0u }; d < M::Dim; ++d) { - // num_cells *= static_cast(xi_max_h(d)) - - // static_cast(xi_min_h(d)); - // } - // - // const auto ppc0 = params.template get("particles.ppc0"); - // npart_t nparticles { 0u }; - // if (number_density > computed_avg_density) { - // nparticles = static_cast( - // (long double)(ppc0 * (number_density - computed_avg_density) * 0.5) * - // num_cells); - // } - // - // return { nparticles != 0u, nparticles, xi_min, xi_max }; - // } - // }; - - template class ED, - template class SD> - struct NonUniformInjector { - using energy_dist_t = ED; - using spatial_dist_t = SD; - static_assert(M::is_metric, "M must be a metric class"); - static_assert(energy_dist_t::is_energy_dist, - "E must be an energy distribution class"); - static_assert(spatial_dist_t::is_spatial_dist, - "SD must be a spatial distribution class"); - static constexpr bool is_nonuniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_t energy_dist; - const spatial_dist_t spatial_dist; - const std::pair species; - - NonUniformInjector(const energy_dist_t& energy_dist, - const spatial_dist_t& spatial_dist, - const std::pair& species) - : energy_dist { energy_dist } - , spatial_dist { spatial_dist } - , species { species } {} - - ~NonUniformInjector() = default; - }; - - template - struct AtmosphereInjector { - struct TargetDensityProfile { - const real_t nmax, height, xsurf, ds; - - TargetDensityProfile(real_t nmax, real_t height, real_t xsurf, real_t ds) - : nmax { nmax } - , height { height } - , xsurf { xsurf } - , ds { ds } {} - - Inline auto operator()(const coord_t& x_Ph) const -> real_t { - if constexpr ((O == in::x1) or - (O == in::x2 and (M::Dim == Dim::_2D or M::Dim == Dim::_3D)) or - (O == in::x3 and M::Dim == Dim::_3D)) { - const auto xi = x_Ph[static_cast(O)]; - if constexpr (P) { - // + direction - if (xi < xsurf - ds or xi >= xsurf) { - return ZERO; - } else { - if constexpr (M::CoordType == Coord::Cart) { - return nmax * math::exp(-(xsurf - xi) / height); - } else { - raise::KernelError( - HERE, - "Atmosphere in +x cannot be applied for non-cartesian"); - return ZERO; - } - } - } else { - // - direction - if (xi < xsurf or xi >= xsurf + ds) { - return ZERO; - } else { - if constexpr (M::CoordType == Coord::Cart) { - return nmax * math::exp(-(xi - xsurf) / height); - } else { - return nmax * math::exp(-(xsurf / height) * (ONE - (xsurf / xi))); - } - } - } - } else { - raise::KernelError(HERE, "Wrong direction"); - return ZERO; - } - } - }; - - using energy_dist_t = Maxwellian; - using spatial_dist_t = Replenish; - static_assert(M::is_metric, "M must be a metric class"); - static constexpr bool is_nonuniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_t energy_dist; - const TargetDensityProfile target_density; - const spatial_dist_t spatial_dist; - const std::pair species; + domain.mesh.metric.template convert(xCorner_min_Ph, + xCorner_min_Cd); + domain.mesh.metric.template convert(xCorner_max_Ph, + xCorner_max_Cd); + + array_t xi_min { "xi_min", M::Dim }, xi_max { "xi_max", M::Dim }; + + auto xi_min_h = Kokkos::create_mirror_view(xi_min); + auto xi_max_h = Kokkos::create_mirror_view(xi_max); + for (auto d { 0u }; d < M::Dim; ++d) { + xi_min_h(d) = xCorner_min_Cd[d]; + xi_max_h(d) = xCorner_max_Cd[d]; + } + Kokkos::deep_copy(xi_min, xi_min_h); + Kokkos::deep_copy(xi_max, xi_max_h); - AtmosphereInjector(const M& metric, - const ndfield_t& density, - real_t nmax, - real_t height, - real_t xsurf, - real_t ds, - real_t T, - random_number_pool_t& pool, - const std::pair& species) - : energy_dist { metric, pool, T } - , target_density { nmax, height, xsurf, ds } - , spatial_dist { metric, density, 0, target_density, nmax } - , species { species } {} + return { true, xi_min, xi_max }; + } - ~AtmosphereInjector() = default; - }; + /** + * @brief Computes the number of particles to inject in a given region + * @param params Simulation parameters + * @param domain Domain object + * @param number_density Number density (in units of n0) + * @param box Region to inject the particles in global coords + * @tparam S Simulation engine type + * @tparam M Metric type + * @return Tuple containing: + * - bool: whether the region intersects with the local domain + * - npart_t: number of particles to inject + * - array_t: minimum coordinates of the region in computational coords + * - array_t: maximum coordinates of the region in computational coords + */ + template + auto ComputeNumInject(const SimulationParams& params, + const Domain& domain, + real_t number_density, + const boundaries_t& box) + -> std::tuple, array_t> { + const auto result = DeduceRegion(domain, box); + if (not std::get<0>(result)) { + return { false, (npart_t)0, array_t {}, array_t {} }; + } + const auto xi_min = std::get<1>(result); + const auto xi_max = std::get<2>(result); + auto xi_min_h = Kokkos::create_mirror_view(xi_min); + auto xi_max_h = Kokkos::create_mirror_view(xi_max); + Kokkos::deep_copy(xi_min_h, xi_min); + Kokkos::deep_copy(xi_max_h, xi_max); + + long double num_cells { 1.0 }; + for (auto d { 0u }; d < M::Dim; ++d) { + num_cells *= static_cast(xi_max_h(d)) - + static_cast(xi_min_h(d)); + } - template - struct MovingInjector { - struct TargetDensityProfile { - const real_t nmax, xinj, xdrift; + const auto ppc0 = params.template get("particles.ppc0"); + const auto nparticles = static_cast( + (long double)(ppc0 * number_density * 0.5) * num_cells); - TargetDensityProfile(real_t xinj, real_t xdrift, real_t nmax) - : xinj { xinj } - , xdrift { xdrift } - , nmax { nmax } {} + return { true, nparticles, xi_min, xi_max }; + } - Inline auto operator()(const coord_t& x_Ph) const -> real_t { - if constexpr ((O == in::x1) or - (O == in::x2 and (M::Dim == Dim::_2D or M::Dim == Dim::_3D)) or - (O == in::x3 and M::Dim == Dim::_3D)) { - const auto xi = x_Ph[static_cast(O)]; + template + struct AtmosphereDensityProfile { + const real_t nmax, height, xsurf, ds; + + AtmosphereDensityProfile(real_t nmax, real_t height, real_t xsurf, real_t ds) + : nmax { nmax } + , height { height } + , xsurf { xsurf } + , ds { ds } {} + + Inline auto operator()(const coord_t& x_Ph) const -> real_t { + if constexpr ((O == in::x1) or + (O == in::x2 and (D == Dim::_2D or D == Dim::_3D)) or + (O == in::x3 and D == Dim::_3D)) { + const auto xi = x_Ph[static_cast(O)]; + if constexpr (P) { // + direction - if (xi < xdrift or xi >= xinj) { + if (xi < xsurf - ds or xi >= xsurf) { return ZERO; } else { - if constexpr (M::CoordType == Coord::Cart) { - return nmax; + if constexpr (C == Coord::Cart) { + return nmax * math::exp(-(xsurf - xi) / height); } else { raise::KernelError( HERE, - "Moving injector in +x cannot be applied for non-cartesian"); + "Atmosphere in +x cannot be applied for non-cartesian"); return ZERO; } } } else { - raise::KernelError(HERE, "Wrong direction"); - return ZERO; + // - direction + if (xi < xsurf or xi >= xsurf + ds) { + return ZERO; + } else { + if constexpr (C == Coord::Cart) { + return nmax * math::exp(-(xi - xsurf) / height); + } else { + return nmax * math::exp(-(xsurf / height) * (ONE - (xsurf / xi))); + } + } } + } else { + raise::KernelError(HERE, "Wrong direction"); + return ZERO; } - }; - - using energy_dist_t = Maxwellian; - using spatial_dist_t = Replenish; - static_assert(M::is_metric, "M must be a metric class"); - static constexpr bool is_nonuniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_t energy_dist; - const TargetDensityProfile target_density; - const spatial_dist_t spatial_dist; - const std::pair species; - - MovingInjector(const M& metric, - const ndfield_t& density, - const energy_dist_t& energy_dist, - real_t xinj, - real_t xdrift, - real_t nmax, - const std::pair& species) - : energy_dist { energy_dist } - , target_density { xinj, xdrift, nmax } - , spatial_dist { metric, density, 0, target_density, nmax } - , species { species } {} - - ~MovingInjector() = default; - }; - - // /** - // * @brief Injects uniform number density of particles everywhere in the domain - // * @param domain Domain object - // * @param injector Uniform injector object - // * @param number_density Total number density (in units of n0) - // * @param use_weights Use weights - // * @param box Region to inject the particles in global coords - // * @tparam S Simulation engine type - // * @tparam M Metric type - // * @tparam I Injector type - // */ - // template - // inline void InjectUniform(const SimulationParams& params, - // Domain& domain, - // const I& injector, - // real_t number_density, - // bool use_weights = false, - // const boundaries_t& box = {}) { - // static_assert(M::is_metric, "M must be a metric class"); - // static_assert(I::is_uniform_injector, "I must be a uniform injector class"); - // raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), - // "Weights must be used for non-Cartesian coordinates", - // HERE); - // raise::ErrorIf((M::CoordType == Coord::Cart) && use_weights, - // "Weights should not be used for Cartesian coordinates", - // HERE); - // raise::ErrorIf(params.template get("particles.use_weights") != use_weights, - // "Weights must be enabled from the input file to use them in " - // "the injector", - // HERE); - // if (domain.species[injector.species.first - 1].charge() + - // domain.species[injector.species.second - 1].charge() != - // 0.0f) { - // raise::Warning("Total charge of the injected species is non-zero", HERE); - // } - // - // { - // boundaries_t nonempty_box; - // for (auto d { 0u }; d < M::Dim; ++d) { - // if (d < box.size()) { - // nonempty_box.push_back({ box[d].first, box[d].second }); - // } else { - // nonempty_box.push_back(Range::All); - // } - // } - // const auto result = injector.ComputeNumInject(params, - // domain, - // number_density, - // nonempty_box); - // if (not std::get<0>(result)) { - // return; - // } - // const auto nparticles = std::get<1>(result); - // const auto xi_min = std::get<2>(result); - // const auto xi_max = std::get<3>(result); - // - // Kokkos::parallel_for( - // "InjectUniform", - // nparticles, - // kernel::UniformInjector_kernel( - // injector.species.first, - // injector.species.second, - // domain.species[injector.species.first - 1], - // domain.species[injector.species.second - 1], - // domain.species[injector.species.first - 1].npart(), - // domain.species[injector.species.second - 1].npart(), - // domain.mesh.metric, - // xi_min, - // xi_max, - // injector.energy_dist, - // ONE / params.template get("scales.V0"), - // domain.random_pool)); - // domain.species[injector.species.first - 1].set_npart( - // domain.species[injector.species.first - 1].npart() + nparticles); - // domain.species[injector.species.second - 1].set_npart( - // domain.species[injector.species.second - 1].npart() + nparticles); - // } - // } - // - // namespace experimental { - - template class ED1, - template class ED2> - struct UniformInjector : BaseInjector { - using energy_dist_1_t = ED1; - using energy_dist_2_t = ED2; - static_assert(M::is_metric, "M must be a metric class"); - static_assert(energy_dist_1_t::is_energy_dist, - "ED1 must be an energy distribution class"); - static_assert(energy_dist_2_t::is_energy_dist, - "ED2 must be an energy distribution class"); - static constexpr bool is_uniform_injector { true }; - static constexpr Dimension D { M::Dim }; - static constexpr Coord C { M::CoordType }; - - const energy_dist_1_t energy_dist_1; - const energy_dist_2_t energy_dist_2; - const std::pair species; - - UniformInjector(const energy_dist_1_t& energy_dist_1, - const energy_dist_2_t& energy_dist_2, - const std::pair& species) - : energy_dist_1 { energy_dist_1 } - , energy_dist_2 { energy_dist_2 } - , species { species } {} - - ~UniformInjector() = default; + } }; /** * @brief Injects uniform number density of particles everywhere in the domain * @param domain Domain object - * @param injector Uniform injector object + * @param species Pair of species indices + * @param energy_dists Pair of energy distribution objects * @param number_density Total number density (in units of n0) * @param use_weights Use weights * @param box Region to inject the particles in global coords * @tparam S Simulation engine type * @tparam M Metric type - * @tparam I Injector type + * @tparam ED1 Energy distribution type for species 1 + * @tparam ED2 Energy distribution type for species 2 */ - template - inline void InjectUniform(const SimulationParams& params, - Domain& domain, - const I& injector, - real_t number_density, + template + inline void InjectUniform(const SimulationParams& params, + Domain& domain, + const std::pair& species, + const std::pair& energy_dists, + real_t number_density, bool use_weights = false, const boundaries_t& box = {}) { static_assert(M::is_metric, "M must be a metric class"); - static_assert(I::is_uniform_injector, "I must be a uniform injector class"); + static_assert(ED1::is_energy_dist, "ED1 must be an energy distribution class"); + static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), "Weights must be used for non-Cartesian coordinates", HERE); @@ -563,8 +218,8 @@ namespace arch { "Weights must be enabled from the input file to use them in " "the injector", HERE); - if (domain.species[injector.species.first - 1].charge() + - domain.species[injector.species.second - 1].charge() != + if (domain.species[species.first - 1].charge() + + domain.species[species.second - 1].charge() != 0.0f) { raise::Warning("Total charge of the injected species is non-zero", HERE); } @@ -578,10 +233,7 @@ namespace arch { nonempty_box.push_back(Range::All); } } - const auto result = injector.ComputeNumInject(params, - domain, - number_density, - nonempty_box); + const auto result = ComputeNumInject(params, domain, number_density, nonempty_box); if (not std::get<0>(result)) { return; } @@ -589,34 +241,31 @@ namespace arch { const auto xi_min = std::get<2>(result); const auto xi_max = std::get<3>(result); - Kokkos::parallel_for( - "InjectUniform", - nparticles, - kernel::UniformInjector_kernel( - injector.species.first, - injector.species.second, - domain.species[injector.species.first - 1], - domain.species[injector.species.second - 1], - nparticles, - domain.index(), - domain.species[injector.species.first - 1].npart(), - domain.species[injector.species.second - 1].npart(), - domain.mesh.metric, - xi_min, - xi_max, - injector.energy_dist_1, - injector.energy_dist_2, - ONE / params.template get("scales.V0"), - domain.random_pool)); - domain.species[injector.species.first - 1].set_npart( - domain.species[injector.species.first - 1].npart() + nparticles); - domain.species[injector.species.second - 1].set_npart( - domain.species[injector.species.second - 1].npart() + nparticles); + Kokkos::parallel_for("InjectUniform", + nparticles, + kernel::UniformInjector_kernel( + species.first, + species.second, + domain.species[species.first - 1], + domain.species[species.second - 1], + nparticles, + domain.index(), + domain.species[species.first - 1].npart(), + domain.species[species.second - 1].npart(), + domain.mesh.metric, + xi_min, + xi_max, + energy_dists.first, + energy_dists.second, + ONE / params.template get("scales.V0"), + domain.random_pool)); + domain.species[species.first - 1].set_npart( + domain.species[species.first - 1].npart() + nparticles); + domain.species[species.second - 1].set_npart( + domain.species[species.second - 1].npart() + nparticles); } } - // } // namespace experimental - /** * @brief Injects particles from a globally-defined map * @note very inefficient, should only be used for debug purposes @@ -651,21 +300,31 @@ namespace arch { * @brief Injects particles based on spatial distribution function * @param params Simulation parameters * @param domain Local domain object - * @param injector Non-uniform injector object + * @param species Pair of species indices + * @param energy_dists Pair of energy distribution objects + * @param spatial_dist Spatial distribution object * @param number_density Total number density (in units of n0) * @param use_weights Use weights * @param box Region to inject the particles in + * @tparam S Simulation engine type + * @tparam M Metric type + * @tparam ED1 Energy distribution type for species 1 + * @tparam ED2 Energy distribution type for species 2 + * @tparam SD Spatial distribution type */ - template - inline void InjectNonUniform(const SimulationParams& params, - Domain& domain, - const I& injector, + template + inline void InjectNonUniform(const SimulationParams& params, + Domain& domain, + const std::pair& species, + const std::pair& energy_dists, + const SD& spatial_dist, real_t number_density, bool use_weights = false, const boundaries_t& box = {}) { static_assert(M::is_metric, "M must be a metric class"); - static_assert(I::is_nonuniform_injector, - "I must be a nonuniform injector class"); + static_assert(ED1::is_energy_dist, "ED1 must be an energy distribution class"); + static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); + static_assert(SD::is_spatial_dist, "SD must be a spatial distribution class"); raise::ErrorIf((M::CoordType != Coord::Cart) && (not use_weights), "Weights must be used for non-Cartesian coordinates", HERE); @@ -680,8 +339,8 @@ namespace arch { not params.template get("particles.use_weights") and use_weights, "Weights are not enabled in the input but enabled in the injector", HERE); - if (domain.species[injector.species.first - 1].charge() + - domain.species[injector.species.second - 1].charge() != + if (domain.species[species.first - 1].charge() + + domain.species[species.second - 1].charge() != 0.0f) { raise::Warning("Total charge of the injected species is non-zero", HERE); } @@ -707,28 +366,28 @@ namespace arch { } const auto ppc = number_density * params.template get("particles.ppc0") * HALF; - auto injector_kernel = - kernel::NonUniformInjector_kernel( - ppc, - injector.species.first, - injector.species.second, - domain.species[injector.species.first - 1], - domain.species[injector.species.second - 1], - domain.species[injector.species.first - 1].npart(), - domain.species[injector.species.second - 1].npart(), - domain.mesh.metric, - injector.energy_dist, - injector.spatial_dist, - ONE / params.template get("scales.V0"), - domain.random_pool); + auto injector_kernel = kernel::NonUniformInjector_kernel( + ppc, + species.first, + species.second, + domain.species[species.first - 1], + domain.species[species.second - 1], + domain.species[species.first - 1].npart(), + domain.species[species.second - 1].npart(), + domain.mesh.metric, + energy_dists.first, + energy_dists.second, + spatial_dist, + ONE / params.template get("scales.V0"), + domain.random_pool); Kokkos::parallel_for("InjectNonUniformNumberDensity", cell_range, injector_kernel); const auto n_inj = injector_kernel.number_injected(); - domain.species[injector.species.first - 1].set_npart( - domain.species[injector.species.first - 1].npart() + n_inj); - domain.species[injector.species.second - 1].set_npart( - domain.species[injector.species.second - 1].npart() + n_inj); + domain.species[species.first - 1].set_npart( + domain.species[species.first - 1].npart() + n_inj); + domain.species[species.second - 1].set_npart( + domain.species[species.second - 1].npart() + n_inj); } } diff --git a/src/archetypes/spatial_dist.h b/src/archetypes/spatial_dist.h index 55c84ddf2..68477208c 100644 --- a/src/archetypes/spatial_dist.h +++ b/src/archetypes/spatial_dist.h @@ -5,6 +5,7 @@ * - arch::SpatialDistribution<> * - arch::Uniform<> : arch::SpatialDistribution<> * - arch::Replenish<> : arch::SpatialDistribution<> + * - arch::ReplenishUniform<> : arch::SpatialDistribution<> * @namespace * - arch:: * @note @@ -45,17 +46,17 @@ namespace arch { } }; - template + template struct Replenish : public SpatialDistribution { using SpatialDistribution::metric; - const ndfield_t density; + const ndfield_t density; const idx_t idx; const T target_density; const real_t target_max_density; Replenish(const M& metric, - const ndfield_t& density, + const ndfield_t& density, idx_t idx, const T& target_density, real_t target_max_density) @@ -92,6 +93,49 @@ namespace arch { } }; + template + struct ReplenishUniform : public SpatialDistribution { + using SpatialDistribution::metric; + const ndfield_t density; + const idx_t idx; + + const real_t target_density; + + ReplenishUniform(const M& metric, + const ndfield_t& density, + idx_t idx, + real_t target_density) + : SpatialDistribution { metric } + , density { density } + , idx { idx } + , target_density { target_density } {} + + Inline auto operator()(const coord_t& x_Ph) const -> real_t { + coord_t x_Cd { ZERO }; + metric.template convert(x_Ph, x_Cd); + real_t dens { ZERO }; + if constexpr (M::Dim == Dim::_1D) { + dens = density(static_cast(x_Cd[0]) + N_GHOSTS, idx); + } else if constexpr (M::Dim == Dim::_2D) { + dens = density(static_cast(x_Cd[0]) + N_GHOSTS, + static_cast(x_Cd[1]) + N_GHOSTS, + idx); + } else if constexpr (M::Dim == Dim::_3D) { + dens = density(static_cast(x_Cd[0]) + N_GHOSTS, + static_cast(x_Cd[1]) + N_GHOSTS, + static_cast(x_Cd[2]) + N_GHOSTS, + idx); + } else { + raise::KernelError(HERE, "Invalid dimension"); + } + if (0.9 * target_density > dens) { + return (target_density - dens) / target_density; + } else { + return ZERO; + } + } + }; + } // namespace arch #endif // ARCHETYPES_SPATIAL_DIST_HPP diff --git a/src/archetypes/utils.h b/src/archetypes/utils.h index 7a5296771..3447558fb 100644 --- a/src/archetypes/utils.h +++ b/src/archetypes/utils.h @@ -17,6 +17,7 @@ #include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" #include "framework/domain/domain.h" +#include "framework/parameters.h" #include @@ -60,17 +61,14 @@ namespace arch { temperature_2, drift_four_vels.second); - const auto injector = arch::UniformInjector( - maxwellian_1, - maxwellian_2, - species); - - arch::InjectUniform(params, - domain, - injector, - tot_number_density, - use_weights, - box); + arch::InjectUniform( + params, + domain, + species, + { maxwellian_1, maxwellian_2 }, + tot_number_density, + use_weights, + box); } /** diff --git a/src/engines/srpic.hpp b/src/engines/srpic.hpp index a64c44f22..0b85aa230 100644 --- a/src/engines/srpic.hpp +++ b/src/engines/srpic.hpp @@ -23,7 +23,9 @@ #include "utils/timer.h" #include "utils/toml.h" +#include "archetypes/energy_dist.h" #include "archetypes/particle_injector.h" +#include "archetypes/spatial_dist.h" #include "framework/domain/domain.h" #include "framework/parameters.h" @@ -1268,119 +1270,153 @@ namespace ntt { m_metadomain.SynchronizeFields(domain, Comm::Bckp, { 0, 1 }); } + const auto maxwellian = arch::Maxwellian { domain.mesh.metric, + domain.random_pool, + temp }; + if (dim == in::x1) { if (sign > 0) { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } else { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } } else if (dim == in::x2) { if (sign > 0) { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } else { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } } else if (dim == in::x3) { if (sign > 0) { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } else { - const auto atm_injector = - arch::AtmosphereInjector { - domain.mesh.metric, - domain.fields.bckp, + auto target_density = + arch::AtmosphereDensityProfile { nmax, height, x_surf, - ds, - temp, - domain.random_pool, - species + ds }; - arch::InjectNonUniform(m_params, - domain, - atm_injector, - nmax, - use_weights); + const auto spatial_dist = arch::Replenish { + domain.mesh.metric, + domain.fields.bckp, + 0, + target_density, + nmax + }; + arch::InjectNonUniform( + m_params, + domain, + { species.first, species.second }, + { maxwellian, maxwellian }, + spatial_dist, + nmax, + use_weights); } } else { raise::Error("Invalid dimension", HERE); diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index 69642e2fd..5169b3032 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -529,11 +529,12 @@ namespace kernel { } }; // struct GlobalInjector_kernel - template + template struct NonUniformInjector_kernel { - static_assert(ED::is_energy_dist, "ED must be an energy distribution class"); - static_assert(SD::is_spatial_dist, "SD must be a spatial distribution class"); static_assert(M::is_metric, "M must be a metric class"); + static_assert(ED1::is_energy_dist, "ED1 must be an energy distribution class"); + static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); + static_assert(SD::is_spatial_dist, "SD must be a spatial distribution class"); const real_t ppc0; const spidx_t spidx1, spidx2; @@ -556,7 +557,8 @@ namespace kernel { npart_t offset1, offset2; M metric; - const ED energy_dist; + const ED1 energy_dist_1; + const ED2 energy_dist_2; const SD spatial_dist; const real_t inv_V0; random_number_pool_t random_pool; @@ -569,7 +571,8 @@ namespace kernel { npart_t offset1, npart_t offset2, const M& metric, - const ED& energy_dist, + const ED1& energy_dist_1, + const ED2& energy_dist_2, const SD& spatial_dist, real_t inv_V0, random_number_pool_t& random_pool) @@ -603,7 +606,8 @@ namespace kernel { , offset1 { offset1 } , offset2 { offset2 } , metric { metric } - , energy_dist { energy_dist } + , energy_dist_1 { energy_dist_1 } + , energy_dist_2 { energy_dist_2 } , spatial_dist { spatial_dist } , inv_V0 { inv_V0 } , random_pool { random_pool } {} @@ -635,12 +639,12 @@ namespace kernel { dx1s_2(index + offset2) = dx1; vec_t v_T { ZERO }, v_XYZ { ZERO }; - energy_dist(x_Ph, v_T, spidx1); + energy_dist_1(x_Ph, v_T, spidx1); metric.template transform_xyz(x_Cd, v_T, v_XYZ); ux1s_1(index + offset1) = v_XYZ[0]; ux2s_1(index + offset1) = v_XYZ[1]; ux3s_1(index + offset1) = v_XYZ[2]; - energy_dist(x_Ph, v_T, spidx2); + energy_dist_2(x_Ph, v_T, spidx2); metric.template transform_xyz(x_Cd, v_T, v_XYZ); ux1s_2(index + offset2) = v_XYZ[0]; ux2s_2(index + offset2) = v_XYZ[1]; @@ -697,7 +701,7 @@ namespace kernel { dx2s_2(index + offset2) = dx2; vec_t v_T { ZERO }, v_Cd { ZERO }; - energy_dist(x_Ph, v_T, spidx1); + energy_dist_1(x_Ph, v_T, spidx1); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd_, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { @@ -706,7 +710,7 @@ namespace kernel { ux1s_1(index + offset1) = v_Cd[0]; ux2s_1(index + offset1) = v_Cd[1]; ux3s_1(index + offset1) = v_Cd[2]; - energy_dist(x_Ph, v_T, spidx2); + energy_dist_2(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd_, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { @@ -770,7 +774,7 @@ namespace kernel { dx3s_2(index + offset2) = dx3; vec_t v_T { ZERO }, v_Cd { ZERO }; - energy_dist(x_Ph, v_T, spidx1); + energy_dist_1(x_Ph, v_T, spidx1); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { @@ -779,7 +783,7 @@ namespace kernel { ux1s_1(index + offset1) = v_Cd[0]; ux2s_1(index + offset1) = v_Cd[1]; ux3s_1(index + offset1) = v_Cd[2]; - energy_dist(x_Ph, v_T, spidx2); + energy_dist_2(x_Ph, v_T, spidx2); if constexpr (S == SimEngine::SRPIC) { metric.template transform_xyz(x_Cd, v_T, v_Cd); } else if constexpr (S == SimEngine::GRPIC) { From ece6aaec4aea7a3d91bc199e7b90aa55058ea033 Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 10 Nov 2025 18:30:03 -0500 Subject: [PATCH 110/154] minor --- pgens/shock/pgen.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pgens/shock/pgen.hpp b/pgens/shock/pgen.hpp index d3082bad9..fc579777d 100644 --- a/pgens/shock/pgen.hpp +++ b/pgens/shock/pgen.hpp @@ -8,9 +8,7 @@ #include "utils/error.h" #include "utils/numeric.h" -#include "archetypes/energy_dist.h" #include "archetypes/field_setter.h" -#include "archetypes/particle_injector.h" #include "archetypes/problem_generator.h" #include "archetypes/utils.h" #include "framework/domain/metadomain.h" @@ -179,7 +177,7 @@ namespace user { const auto drifts = std::make_pair( std::vector { -drift_ux, ZERO, ZERO }, std::vector { -drift_ux, ZERO, ZERO }); - + // inject particles arch::InjectUniformMaxwellians(params, domain, From 6ce24ecad3b988b07e1267b9e96a84e2327218ae Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 10 Nov 2025 18:32:44 -0500 Subject: [PATCH 111/154] extra files written to same directory --- src/framework/domain/stats.cpp | 5 ++-- src/framework/simulation.cpp | 1 + src/global/utils/diag.cpp | 4 +-- src/global/utils/log.h | 24 ++++++++++------ src/global/utils/plog.h | 50 +++++++++++++++++++++++++++------- 5 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/framework/domain/stats.cpp b/src/framework/domain/stats.cpp index 49beadf70..009953b5f 100644 --- a/src/framework/domain/stats.cpp +++ b/src/framework/domain/stats.cpp @@ -39,8 +39,9 @@ namespace ntt { raise::ErrorIf(local_domain->is_placeholder(), "local_domain is a placeholder", HERE); - const auto filename = params.template get("simulation.name") + - "_stats.csv"; + const auto simname = params.template get("simulation.name"); + const auto filename = std::filesystem::path(simname) / + (simname + "_stats.csv"); const auto enable_stats = params.template get("output.stats.enable"); if (enable_stats and (not is_resuming)) { CallOnce( diff --git a/src/framework/simulation.cpp b/src/framework/simulation.cpp index 6735eda79..20dc39486 100644 --- a/src/framework/simulation.cpp +++ b/src/framework/simulation.cpp @@ -35,6 +35,7 @@ namespace ntt { "log_level", defaults::diag::log_level); logger::initPlog(sim_name, + sim_name, log_level); m_requested_engine = SimEngine::pick( diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index f6f615587..ba6c8e4b2 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -4,6 +4,7 @@ #include "utils/colors.h" #include "utils/formatting.h" +#include "utils/log.h" #include "utils/progressbar.h" #include "utils/timer.h" @@ -241,7 +242,6 @@ namespace diag { ss << std::setw(80) << std::setfill('.') << "" << std::endl << std::endl; }); - std::cout << ((diag_flags & Diag::Colorful) ? ss.str() - : color::strip(ss.str())); + info::Print(ss.str(), diag_flags & Diag::Colorful, true, true, false); } } // namespace diag diff --git a/src/global/utils/log.h b/src/global/utils/log.h index 2434414a4..199e340b4 100644 --- a/src/global/utils/log.h +++ b/src/global/utils/log.h @@ -122,31 +122,37 @@ namespace info { inline void Print(const std::string& msg, bool colored = true, bool stdout = true, - bool once = true) { + bool once = true, + bool info = true) { auto msg_nocol = color::strip(msg); if (once) { CallOnce( - [](auto& msg, auto& msg_nocol, auto& stdout, auto& colored) { - PLOGN_(InfoFile) << msg_nocol << std::flush; + [](auto& msg, auto& msg_nocol, auto& stdout, auto& colored, auto& info) { + if (info) { + PLOGN_(InfoFile) << msg_nocol << std::flush; + } if (stdout) { if (colored) { - std::cout << msg << std::endl; + PLOG(plog::none) << msg << std::endl; } else { - std::cout << msg_nocol << std::endl; + PLOG(plog::none) << msg_nocol << std::endl; } } }, msg, msg_nocol, stdout, - colored); + colored, + info); } else { - PLOGN_(InfoFile) << msg_nocol << std::flush; + if (info) { + PLOGN_(InfoFile) << msg_nocol << std::flush; + } if (stdout) { if (colored) { - std::cout << msg << std::endl; + PLOG(plog::none) << msg << std::endl; } else { - std::cout << msg_nocol << std::endl; + PLOG(plog::none) << msg_nocol << std::endl; } } } diff --git a/src/global/utils/plog.h b/src/global/utils/plog.h index 7713a3728..62f6763d5 100644 --- a/src/global/utils/plog.h +++ b/src/global/utils/plog.h @@ -13,6 +13,7 @@ #ifndef GLOBAL_UTILS_PLOG_H #define GLOBAL_UTILS_PLOG_H +#include "utils/colors.h" #include "utils/formatting.h" #include @@ -35,13 +36,32 @@ namespace plog { static auto format(const Record& record) -> util::nstring { util::nostringstream ss; - ss << std::setw(6) << std::left << severityToString(record.getSeverity()) - << PLOG_NSTR(": "); + if (record.getSeverity() != plog::none) { + ss << std::setw(6) << std::left + << severityToString(record.getSeverity()) << PLOG_NSTR(": "); + } ss << record.getMessage() << PLOG_NSTR("\n"); return ss.str(); } }; + class NttStdoutFormatter { + public: + static auto header() -> util::nstring { + return util::nstring(); + } + + static auto format(const Record& record) -> util::nstring { + util::nostringstream ss; + if (record.getSeverity() != plog::none) { + ss << std::setw(6) << std::left + << severityToString(record.getSeverity()) << PLOG_NSTR(": "); + } + ss << color::strip(record.getMessage()) << PLOG_NSTR("\n"); + return ss.str(); + } + }; + class NttInfoFormatter { public: static auto header() -> util::nstring { @@ -59,26 +79,36 @@ namespace plog { namespace logger { template - inline void initPlog(const std::string& fname, const std::string& log_level) { + inline void initPlog(const std::string& fpath, + const std::string& fname, + const std::string& log_level) { // setup logging + const auto outfile_name = fname + ".out"; const auto logfile_name = fname + ".log"; const auto infofile_name = fname + ".info"; const auto errfile_name = fname + ".err"; namespace fs = std::filesystem; - fs::path logfile_path { logfile_name }; - fs::path infofile_path { infofile_name }; - fs::path errfile_path { errfile_name }; + if (not fpath.empty() and not fs::exists(fpath)) { + fs::create_directory(fpath); + } + fs::path outfile_path { fs::path { fpath } / outfile_name }; + fs::path logfile_path { fs::path { fpath } / logfile_name }; + fs::path infofile_path { fs::path { fpath } / infofile_name }; + fs::path errfile_path { fs::path { fpath } / errfile_name }; + fs::remove(outfile_path); fs::remove(logfile_path); fs::remove(infofile_path); fs::remove(errfile_path); + static plog::RollingFileAppender outfileAppender( + outfile_path.c_str()); static plog::RollingFileAppender logfileAppender( - logfile_name.c_str()); + logfile_path.c_str()); static plog::RollingFileAppender infofileAppender( - infofile_name.c_str()); + infofile_path.c_str()); static plog::RollingFileAppender errfileAppender( - errfile_name.c_str()); + errfile_path.c_str()); auto log_severity = plog::verbose; if (fmt::toLower(log_level) == "warning") { log_severity = plog::warning; @@ -96,7 +126,7 @@ namespace logger { #endif static plog::ColorConsoleAppender consoleAppender; - plog::init(severity, &consoleAppender); + plog::init(severity, &consoleAppender).addAppender(&outfileAppender); } } // namespace logger From f8181b0046493b02aedef7b8fbefe7fb7faeab4a Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 10 Nov 2025 18:32:58 -0500 Subject: [PATCH 112/154] nonuniform injector with tracking --- src/archetypes/energy_dist.h | 12 +- src/archetypes/particle_injector.h | 17 +- src/global/arch/kokkos_aliases.h | 6 +- src/kernels/injectors.hpp | 361 ++++++++++++++++------------- 4 files changed, 207 insertions(+), 189 deletions(-) diff --git a/src/archetypes/energy_dist.h b/src/archetypes/energy_dist.h index f8ef1175a..578a29684 100644 --- a/src/archetypes/energy_dist.h +++ b/src/archetypes/energy_dist.h @@ -48,9 +48,7 @@ namespace arch { struct Cold : public EnergyDistribution { Cold(const M& metric) : EnergyDistribution { metric } {} - Inline void operator()(const coord_t&, - vec_t& v, - spidx_t = 0) const { + Inline void operator()(const coord_t&, vec_t& v) const { v[0] = ZERO; v[1] = ZERO; @@ -73,9 +71,7 @@ namespace arch { , pl_ind { pl_ind } , pool { pool } {} - Inline void operator()(const coord_t&, - vec_t& v, - spidx_t = 0) const { + Inline void operator()(const coord_t&, vec_t& v) const { auto rand_gen = pool.get_state(); auto rand_X1 = Random(rand_gen); auto rand_gam = ONE; @@ -254,9 +250,7 @@ namespace arch { } } - Inline void operator()(const coord_t& x_Code, - vec_t& v, - spidx_t = 0) const { + Inline void operator()(const coord_t& x_Code, vec_t& v) const { if (cmp::AlmostZero(temperature)) { v[0] = ZERO; v[1] = ZERO; diff --git a/src/archetypes/particle_injector.h b/src/archetypes/particle_injector.h index 634c11d1a..79743ce65 100644 --- a/src/archetypes/particle_injector.h +++ b/src/archetypes/particle_injector.h @@ -244,14 +244,10 @@ namespace arch { Kokkos::parallel_for("InjectUniform", nparticles, kernel::UniformInjector_kernel( - species.first, - species.second, domain.species[species.first - 1], domain.species[species.second - 1], nparticles, domain.index(), - domain.species[species.first - 1].npart(), - domain.species[species.second - 1].npart(), domain.mesh.metric, xi_min, xi_max, @@ -368,12 +364,9 @@ namespace arch { params.template get("particles.ppc0") * HALF; auto injector_kernel = kernel::NonUniformInjector_kernel( ppc, - species.first, - species.second, domain.species[species.first - 1], domain.species[species.second - 1], - domain.species[species.first - 1].npart(), - domain.species[species.second - 1].npart(), + domain.index(), domain.mesh.metric, energy_dists.first, energy_dists.second, @@ -384,10 +377,10 @@ namespace arch { cell_range, injector_kernel); const auto n_inj = injector_kernel.number_injected(); - domain.species[species.first - 1].set_npart( - domain.species[species.first - 1].npart() + n_inj); - domain.species[species.second - 1].set_npart( - domain.species[species.second - 1].npart() + n_inj); + for (auto sp : { species.first, species.second }) { + domain.species[sp - 1].set_npart(domain.species[sp - 1].npart() + n_inj); + domain.species[sp - 1].set_counter(domain.species[sp - 1].counter() + n_inj); + } } } diff --git a/src/global/arch/kokkos_aliases.h b/src/global/arch/kokkos_aliases.h index 712fc6eff..e86da1380 100644 --- a/src/global/arch/kokkos_aliases.h +++ b/src/global/arch/kokkos_aliases.h @@ -234,8 +234,8 @@ auto CreateParticleRangePolicy(npart_t, npart_t) -> range_t; * @returns Kokkos::RangePolicy or Kokkos::MDRangePolicy in the accelerator execution space. */ template -auto CreateRangePolicy(const tuple_t&, const tuple_t&) - -> range_t; +auto CreateRangePolicy(const tuple_t&, + const tuple_t&) -> range_t; /** * @brief Function template for generating ND Kokkos range policy on the host. @@ -249,7 +249,7 @@ auto CreateRangePolicyOnHost(const tuple_t&, const tuple_t&) -> range_h_t; // Random number pool/generator type alias -using random_number_pool_t = Kokkos::Random_XorShift1024_Pool; +using random_number_pool_t = Kokkos::Random_XorShift64_Pool; using random_generator_t = typename random_number_pool_t::generator_type; // Random number generator functions diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index 5169b3032..a598e230a 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -81,8 +81,6 @@ namespace kernel { static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); static_assert(M::is_metric, "M must be a metric class"); - const spidx_t spidx1, spidx2; - array_t i1s_1, i2s_1, i3s_1; array_t dx1s_1, dx2s_1, dx3s_1; array_t ux1s_1, ux2s_1, ux3s_1; @@ -99,9 +97,9 @@ namespace kernel { array_t tags_2; array_t pldis_2; - npart_t offset1, offset2; - npart_t domain_idx, cntr1, cntr2; - bool use_tracking_1, use_tracking_2; + const npart_t offset1, offset2; + const npart_t domain_idx, cntr1, cntr2; + const bool use_tracking_1, use_tracking_2; const M metric; const array_t xi_min, xi_max; const ED1 energy_dist_1; @@ -109,14 +107,10 @@ namespace kernel { const real_t inv_V0; random_number_pool_t random_pool; - UniformInjector_kernel(spidx_t spidx1, - spidx_t spidx2, - Particles& species1, + UniformInjector_kernel(Particles& species1, Particles& species2, npart_t inject_npart, npart_t domain_idx, - npart_t offset1, - npart_t offset2, const M& metric, const array_t& xi_min, const array_t& xi_max, @@ -124,9 +118,7 @@ namespace kernel { const ED2& energy_dist_2, real_t inv_V0, random_number_pool_t& random_pool) - : spidx1 { spidx1 } - , spidx2 { spidx2 } - , i1s_1 { species1.i1 } + : i1s_1 { species1.i1 } , i2s_1 { species1.i2 } , i3s_1 { species1.i3 } , dx1s_1 { species1.dx1 } @@ -152,13 +144,13 @@ namespace kernel { , weights_2 { species2.weight } , tags_2 { species2.tag } , pldis_2 { species2.pld_i } - , offset1 { offset1 } - , offset2 { offset2 } - , use_tracking_1 { species1.use_tracking() } - , use_tracking_2 { species2.use_tracking() } + , offset1 { species1.npart() } + , offset2 { species2.npart() } , domain_idx { domain_idx } , cntr1 { species1.counter() } , cntr2 { species2.counter() } + , use_tracking_1 { species1.use_tracking() } + , use_tracking_2 { species2.use_tracking() } , metric { metric } , xi_min { xi_min } , xi_max { xi_max } @@ -167,7 +159,6 @@ namespace kernel { , inv_V0 { inv_V0 } , random_pool { random_pool } { if (use_tracking_1) { - printf("using tracking for species #1\n"); species1.set_counter(cntr1 + inject_npart); #if !defined(MPI_ENABLED) raise::ErrorIf(species1.pld_i.extent(1) < 1, @@ -184,7 +175,6 @@ namespace kernel { #endif } if (use_tracking_2) { - printf("using tracking for species #2\n"); species2.set_counter(cntr2 + inject_npart); #if !defined(MPI_ENABLED) raise::ErrorIf(species2.pld_i.extent(1) < 1, @@ -232,34 +222,34 @@ namespace kernel { coord_t x_Ph { ZERO }; metric.template convert(x_Cd, x_Ph); if constexpr (M::CoordType == Coord::Cart) { - energy_dist_1(x_Ph, v1, spidx1); - energy_dist_2(x_Ph, v2, spidx2); + energy_dist_1(x_Ph, v1); + energy_dist_2(x_Ph, v2); } else if constexpr (S == SimEngine::SRPIC) { coord_t x_Cd_ { ZERO }; x_Cd_[0] = x_Cd[0]; x_Cd_[1] = x_Cd[1]; x_Cd_[2] = ZERO; // phi = 0 vec_t v_Ph { ZERO }; - energy_dist_1(x_Ph, v_Ph, spidx1); + energy_dist_1(x_Ph, v_Ph); metric.template transform_xyz(x_Cd_, v_Ph, v1); - energy_dist_2(x_Ph, v_Ph, spidx2); + energy_dist_2(x_Ph, v_Ph); metric.template transform_xyz(x_Cd_, v_Ph, v2); } else if constexpr (S == SimEngine::GRPIC) { vec_t v_Ph { ZERO }; - energy_dist_1(x_Ph, v_Ph, spidx1); + energy_dist_1(x_Ph, v_Ph); metric.template transform(x_Cd, v_Ph, v1); - energy_dist_2(x_Ph, v_Ph, spidx2); + energy_dist_2(x_Ph, v_Ph); metric.template transform(x_Cd, v_Ph, v2); } else { raise::KernelError(HERE, "Unknown simulation engine"); } } - // clang-format off real_t weight = ONE; if constexpr (M::CoordType != Coord::Cart) { - const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); - weight = sqrt_det_h * inv_V0; + const auto sqrt_det_h = metric.sqrt_det_h(x_Cd); + weight = sqrt_det_h * inv_V0; } + // clang-format off if (not use_tracking_1) { InjectParticle( p + offset1, @@ -275,7 +265,8 @@ namespace kernel { dx1s_1, dx2s_1, dx3s_1, ux1s_1, ux2s_1, ux3s_1, phis_1, weights_1, tags_1, pldis_1, - xi_Cd, dxi_Cd, v1, weight, ZERO, domain_idx, cntr1 + p); + xi_Cd, dxi_Cd, v1, weight, ZERO, + domain_idx, cntr1 + p); } if (not use_tracking_2) { InjectParticle( @@ -292,7 +283,8 @@ namespace kernel { dx1s_2, dx2s_2, dx3s_2, ux1s_2, ux2s_2, ux3s_2, phis_2, weights_2, tags_2, pldis_2, - xi_Cd, dxi_Cd, v2, weight, ZERO, domain_idx, cntr2 + p); + xi_Cd, dxi_Cd, v2, weight, ZERO, + domain_idx, cntr2 + p); } // clang-format on } @@ -536,8 +528,7 @@ namespace kernel { static_assert(ED2::is_energy_dist, "ED2 must be an energy distribution class"); static_assert(SD::is_spatial_dist, "SD must be a spatial distribution class"); - const real_t ppc0; - const spidx_t spidx1, spidx2; + const real_t ppc0; array_t i1s_1, i2s_1, i3s_1; array_t dx1s_1, dx2s_1, dx3s_1; @@ -545,6 +536,7 @@ namespace kernel { array_t phis_1; array_t weights_1; array_t tags_1; + array_t pldis_1; array_t i1s_2, i2s_2, i3s_2; array_t dx1s_2, dx2s_2, dx3s_2; @@ -552,11 +544,14 @@ namespace kernel { array_t phis_2; array_t weights_2; array_t tags_2; + array_t pldis_2; array_t idx { "idx" }; - npart_t offset1, offset2; - M metric; + const npart_t offset1, offset2; + const npart_t domain_idx, cntr1, cntr2; + const bool use_tracking_1, use_tracking_2; + const M metric; const ED1 energy_dist_1; const ED2 energy_dist_2; const SD spatial_dist; @@ -564,12 +559,9 @@ namespace kernel { random_number_pool_t random_pool; NonUniformInjector_kernel(real_t ppc0, - spidx_t spidx1, - spidx_t spidx2, Particles& species1, Particles& species2, - npart_t offset1, - npart_t offset2, + npart_t domain_idx, const M& metric, const ED1& energy_dist_1, const ED2& energy_dist_2, @@ -577,8 +569,6 @@ namespace kernel { real_t inv_V0, random_number_pool_t& random_pool) : ppc0 { ppc0 } - , spidx1 { spidx1 } - , spidx2 { spidx2 } , i1s_1 { species1.i1 } , i2s_1 { species1.i2 } , i3s_1 { species1.i3 } @@ -591,6 +581,7 @@ namespace kernel { , phis_1 { species1.phi } , weights_1 { species1.weight } , tags_1 { species1.tag } + , pldis_1 { species1.pld_i } , i1s_2 { species2.i1 } , i2s_2 { species2.i2 } , i3s_2 { species2.i3 } @@ -603,8 +594,14 @@ namespace kernel { , phis_2 { species2.phi } , weights_2 { species2.weight } , tags_2 { species2.tag } - , offset1 { offset1 } - , offset2 { offset2 } + , pldis_2 { species2.pld_i } + , offset1 { species1.npart() } + , offset2 { species2.npart() } + , domain_idx { domain_idx } + , cntr1 { species1.counter() } + , cntr2 { species2.counter() } + , use_tracking_1 { species1.use_tracking() } + , use_tracking_2 { species2.use_tracking() } , metric { metric } , energy_dist_1 { energy_dist_1 } , energy_dist_2 { energy_dist_2 } @@ -618,50 +615,94 @@ namespace kernel { return idx_h(); } + Inline void inject1(const index_t& index, + const tuple_t& xi_Cd, + const tuple_t& dxi_Cd, + const vec_t& v_Cd, + const real_t& weight) const { + // clang-format off + if (not use_tracking_1) { + InjectParticle(index + offset1, + i1s_1, i2s_1, i3s_1, + dx1s_1, dx2s_1, dx3s_1, + ux1s_1, ux2s_1, ux3s_1, + phis_1, weights_1, tags_1, pldis_1, + xi_Cd, dxi_Cd, v_Cd, weight, ZERO); + } else { + InjectParticle(index + offset1, + i1s_1, i2s_1, i3s_1, + dx1s_1, dx2s_1, dx3s_1, + ux1s_1, ux2s_1, ux3s_1, + phis_1, weights_1, tags_1, pldis_1, + xi_Cd, dxi_Cd, v_Cd, weight, ZERO, + domain_idx, index + cntr1); + } + // clang-format on + } + + Inline void inject2(const index_t& index, + const tuple_t& xi_Cd, + const tuple_t& dxi_Cd, + const vec_t& v_Cd, + const real_t& weight) const { + // clang-format off + if (not use_tracking_2) { + InjectParticle(index + offset1, + i1s_2, i2s_2, i3s_2, + dx1s_2, dx2s_2, dx3s_2, + ux1s_2, ux2s_2, ux3s_2, + phis_2, weights_2, tags_2, pldis_2, + xi_Cd, dxi_Cd, v_Cd, weight, ZERO); + } else { + InjectParticle(index + offset1, + i1s_2, i2s_2, i3s_2, + dx1s_2, dx2s_2, dx3s_2, + ux1s_2, ux2s_2, ux3s_2, + phis_2, weights_2, tags_2, pldis_2, + xi_Cd, dxi_Cd, v_Cd, weight, ZERO, + domain_idx, index + cntr1); + } + // clang-format on + } + Inline void operator()(index_t i1) const { if constexpr (M::Dim == Dim::_1D) { const auto i1_ = COORD(i1); coord_t x_Cd { i1_ + HALF }; coord_t x_Ph { ZERO }; metric.template convert(x_Cd, x_Ph); + const auto ppc = static_cast(ppc0 * spatial_dist(x_Ph)); if (ppc == 0) { return; } - auto rand_gen = random_pool.get_state(); + + auto weight = ONE; + if constexpr (M::CoordType != Coord::Cart) { + weight = metric.sqrt_det_h({ i1_ + HALF }) * inv_V0; + } for (auto p { 0u }; p < ppc; ++p) { const auto index = Kokkos::atomic_fetch_add(&idx(), 1); - const auto dx1 = Random(rand_gen); - - i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1) = dx1; - i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2) = dx1; - - vec_t v_T { ZERO }, v_XYZ { ZERO }; - energy_dist_1(x_Ph, v_T, spidx1); - metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_1(index + offset1) = v_XYZ[0]; - ux2s_1(index + offset1) = v_XYZ[1]; - ux3s_1(index + offset1) = v_XYZ[2]; - energy_dist_2(x_Ph, v_T, spidx2); - metric.template transform_xyz(x_Cd, v_T, v_XYZ); - ux1s_2(index + offset2) = v_XYZ[0]; - ux2s_2(index + offset2) = v_XYZ[1]; - ux3s_2(index + offset2) = v_XYZ[2]; - - tags_1(index + offset1) = ParticleTag::alive; - tags_2(index + offset2) = ParticleTag::alive; - if (M::CoordType == Coord::Cart) { - weights_1(index + offset1) = ONE; - weights_2(index + offset2) = ONE; - } else { - const auto wei = metric.sqrt_det_h({ i1_ + HALF }) * inv_V0; - weights_1(index + offset1) = wei; - weights_2(index + offset2) = wei; + + auto rand_gen = random_pool.get_state(); + const auto dx1 = Random(rand_gen); + random_pool.free_state(rand_gen); + + vec_t v_XYZ { ZERO }; + { + vec_t v_T { ZERO }; + energy_dist_1(x_Ph, v_T); + metric.template transform_xyz(x_Cd, v_T, v_XYZ); } + inject1(index, { static_cast(i1_) }, { dx1 }, v_XYZ, weight); + + { + vec_t v_T { ZERO }; + energy_dist_2(x_Ph, v_T); + metric.template transform_xyz(x_Cd, v_T, v_XYZ); + } + inject2(index, { static_cast(i1_) }, { dx1 }, v_XYZ, weight); } - random_pool.free_state(rand_gen); } else { raise::KernelError(HERE, "NonUniformInjector_kernel 1D called for 2D/3D"); } @@ -680,58 +721,55 @@ namespace kernel { x_Cd_[2] = ZERO; } metric.template convert(x_Cd, x_Ph); + const auto ppc = static_cast(ppc0 * spatial_dist(x_Ph)); if (ppc == 0) { return; } - auto rand_gen = random_pool.get_state(); + + auto weight = ONE; + if constexpr (M::CoordType != Coord::Cart) { + weight = metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF }) * inv_V0; + } for (auto p { 0u }; p < ppc; ++p) { const auto index = Kokkos::atomic_fetch_add(&idx(), 1); - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); - - i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1) = dx1; - i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2) = dx1; - i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1) = dx2; - i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2) = dx2; - - vec_t v_T { ZERO }, v_Cd { ZERO }; - energy_dist_1(x_Ph, v_T, spidx1); - if constexpr (S == SimEngine::SRPIC) { - metric.template transform_xyz(x_Cd_, v_T, v_Cd); - } else if constexpr (S == SimEngine::GRPIC) { - metric.template transform(x_Cd_, v_T, v_Cd); + auto rand_gen = random_pool.get_state(); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); + random_pool.free_state(rand_gen); + + vec_t v_Cd { ZERO }; + { + vec_t v_T { ZERO }; + energy_dist_1(x_Ph, v_T); + if constexpr (S == SimEngine::SRPIC) { + metric.template transform_xyz(x_Cd_, v_T, v_Cd); + } else if constexpr (S == SimEngine::GRPIC) { + metric.template transform(x_Cd_, v_T, v_Cd); + } } - ux1s_1(index + offset1) = v_Cd[0]; - ux2s_1(index + offset1) = v_Cd[1]; - ux3s_1(index + offset1) = v_Cd[2]; - energy_dist_2(x_Ph, v_T, spidx2); - if constexpr (S == SimEngine::SRPIC) { - metric.template transform_xyz(x_Cd_, v_T, v_Cd); - } else if constexpr (S == SimEngine::GRPIC) { - metric.template transform(x_Cd_, v_T, v_Cd); - } - ux1s_2(index + offset2) = v_Cd[0]; - ux2s_2(index + offset2) = v_Cd[1]; - ux3s_2(index + offset2) = v_Cd[2]; - - tags_1(index + offset1) = ParticleTag::alive; - tags_2(index + offset2) = ParticleTag::alive; - if (M::CoordType == Coord::Cart) { - weights_1(index + offset1) = ONE; - weights_2(index + offset2) = ONE; - } else { - const auto wei = metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF }) * inv_V0; - weights_1(index + offset1) = wei; - weights_2(index + offset2) = wei; + inject1(index, + { static_cast(i1_), static_cast(i2_) }, + { dx1, dx2 }, + v_Cd, + weight); + + { + vec_t v_T { ZERO }; + energy_dist_2(x_Ph, v_T); + if constexpr (S == SimEngine::SRPIC) { + metric.template transform_xyz(x_Cd_, v_T, v_Cd); + } else if constexpr (S == SimEngine::GRPIC) { + metric.template transform(x_Cd_, v_T, v_Cd); + } } + inject2(index, + { static_cast(i1_), static_cast(i2_) }, + { dx1, dx2 }, + v_Cd, + weight); } - random_pool.free_state(rand_gen); } else { @@ -747,66 +785,59 @@ namespace kernel { coord_t x_Cd { i1_ + HALF, i2_ + HALF, i3_ + HALF }; coord_t x_Ph { ZERO }; metric.template convert(x_Cd, x_Ph); + const auto ppc = static_cast(ppc0 * spatial_dist(x_Ph)); if (ppc == 0) { return; } - auto rand_gen = random_pool.get_state(); + + auto weight = ONE; + if constexpr (M::CoordType != Coord::Cart) { + weight = metric.sqrt_det_h({ i1_ + HALF, i2_ + HALF, i3_ + HALF }) * + inv_V0; + } for (auto p { 0u }; p < ppc; ++p) { const auto index = Kokkos::atomic_fetch_add(&idx(), 1); - const auto dx1 = Random(rand_gen); - const auto dx2 = Random(rand_gen); - const auto dx3 = Random(rand_gen); - - i1s_1(index + offset1) = static_cast(i1) - N_GHOSTS; - dx1s_1(index + offset1) = dx1; - i1s_2(index + offset2) = static_cast(i1) - N_GHOSTS; - dx1s_2(index + offset2) = dx1; - - i2s_1(index + offset1) = static_cast(i2) - N_GHOSTS; - dx2s_1(index + offset1) = dx2; - i2s_2(index + offset2) = static_cast(i2) - N_GHOSTS; - dx2s_2(index + offset2) = dx2; - - i3s_1(index + offset1) = static_cast(i3) - N_GHOSTS; - dx3s_1(index + offset1) = dx3; - i3s_2(index + offset2) = static_cast(i3) - N_GHOSTS; - dx3s_2(index + offset2) = dx3; - - vec_t v_T { ZERO }, v_Cd { ZERO }; - energy_dist_1(x_Ph, v_T, spidx1); - if constexpr (S == SimEngine::SRPIC) { - metric.template transform_xyz(x_Cd, v_T, v_Cd); - } else if constexpr (S == SimEngine::GRPIC) { - metric.template transform(x_Cd, v_T, v_Cd); - } - ux1s_1(index + offset1) = v_Cd[0]; - ux2s_1(index + offset1) = v_Cd[1]; - ux3s_1(index + offset1) = v_Cd[2]; - energy_dist_2(x_Ph, v_T, spidx2); - if constexpr (S == SimEngine::SRPIC) { - metric.template transform_xyz(x_Cd, v_T, v_Cd); - } else if constexpr (S == SimEngine::GRPIC) { - metric.template transform(x_Cd, v_T, v_Cd); + + auto rand_gen = random_pool.get_state(); + const auto dx1 = Random(rand_gen); + const auto dx2 = Random(rand_gen); + const auto dx3 = Random(rand_gen); + random_pool.free_state(rand_gen); + + vec_t v_Cd { ZERO }; + { + vec_t v_T { ZERO }; + energy_dist_1(x_Ph, v_T); + if constexpr (S == SimEngine::SRPIC) { + metric.template transform_xyz(x_Cd, v_T, v_Cd); + } else if constexpr (S == SimEngine::GRPIC) { + metric.template transform(x_Cd, v_T, v_Cd); + } } - ux1s_2(index + offset2) = v_Cd[0]; - ux2s_2(index + offset2) = v_Cd[1]; - ux3s_2(index + offset2) = v_Cd[2]; - - tags_1(index + offset1) = ParticleTag::alive; - tags_2(index + offset2) = ParticleTag::alive; - if (M::CoordType == Coord::Cart) { - weights_1(index + offset1) = ONE; - weights_2(index + offset2) = ONE; - } else { - const auto wei = metric.sqrt_det_h( - { i1_ + HALF, i2_ + HALF, i3_ + HALF }) * - inv_V0; - weights_1(index + offset1) = wei; - weights_2(index + offset2) = wei; + inject1( + index, + { static_cast(i1_), static_cast(i2_), static_cast(i3_) }, + { dx1, dx2, dx3 }, + v_Cd, + weight); + + { + vec_t v_T { ZERO }; + energy_dist_2(x_Ph, v_T); + if constexpr (S == SimEngine::SRPIC) { + metric.template transform_xyz(x_Cd, v_T, v_Cd); + } else if constexpr (S == SimEngine::GRPIC) { + metric.template transform(x_Cd, v_T, v_Cd); + } } + inject2( + index, + { static_cast(i1_), static_cast(i2_), static_cast(i3_) }, + { dx1, dx2, dx3 }, + v_Cd, + weight); } - random_pool.free_state(rand_gen); } else { raise::KernelError(HERE, "NonUniformInjector_kernel 3D called for 1D/2D"); } From b4d39d0998e9e0771bfd0452e012b35f1fdf1ced Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 11 Nov 2025 15:30:50 +0000 Subject: [PATCH 113/154] minor bug in prtl var definitions --- src/framework/containers/particles_io.cpp | 4 +++- src/kernels/prtls_to_phys.hpp | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index 332c6e9d9..fdb66540f 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -31,7 +31,9 @@ namespace ntt { * * * * * * * * */ template void Particles::OutputDeclare(adios2::IO& io) const { - for (auto d { 0u }; d < D; ++d) { + const auto n_addition_coords = ((D == Dim::_2D) and (C != Coord::Cart)) ? 1 + : 0; + for (auto d { 0u }; d < D + n_addition_coords; ++d) { io.DefineVariable(fmt::format("pX%d_%d", d + 1, index()), { adios2::UnknownDim }, { adios2::UnknownDim }, diff --git a/src/kernels/prtls_to_phys.hpp b/src/kernels/prtls_to_phys.hpp index fbafbe00e..eb0903bd8 100644 --- a/src/kernels/prtls_to_phys.hpp +++ b/src/kernels/prtls_to_phys.hpp @@ -142,8 +142,7 @@ namespace kernel { } if constexpr ((D == Dim::_2D) && (M::CoordType != Coord::Cart)) { buff_x3(p_to) = phi(p_from); - } - if constexpr (D == Dim::_3D) { + } else if constexpr (D == Dim::_3D) { buff_x3(p_to) = metric.template convert<3, Crd::Cd, Crd::Ph>( static_cast(i3(p_from)) + static_cast(dx3(p_from))); } From e5c65432441f0b2273165d6b43476d797643d8ba Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 24 Nov 2025 17:41:03 -0500 Subject: [PATCH 114/154] pld output fixed --- src/framework/containers/particles_io.cpp | 12 ++++-------- src/global/arch/kokkos_aliases.h | 3 +++ src/kernels/prtls_to_phys.hpp | 20 ++++++++------------ src/output/utils/writers.h | 11 ++++++++--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index fdb66540f..ea6c12869 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -59,18 +59,14 @@ namespace ntt { } auto num_track_plds = 0; if (use_tracking()) { -#if !defined(MPI_ENABLED) - num_track_plds = 1; io.DefineVariable(fmt::format("pIDX_%d", index()), { adios2::UnknownDim }, { adios2::UnknownDim }, { adios2::UnknownDim }); +#if !defined(MPI_ENABLED) + num_track_plds = 1; #else num_track_plds = 2; - io.DefineVariable(fmt::format("pIDX_%d", index()), - { adios2::UnknownDim }, - { adios2::UnknownDim }, - { adios2::UnknownDim }); io.DefineVariable(fmt::format("pRNK_%d", index()), { adios2::UnknownDim }, { adios2::UnknownDim }, @@ -109,7 +105,7 @@ namespace ntt { const auto pld_i_d = this->pld_i; Kokkos::parallel_reduce( "CountOutputParticles", - npart(), + rangeActiveParticles(), Lambda(index_t p, npart_t & l_nout) { if ((tag_d(p) == ParticleTag::alive) and (pld_i_d(p, pldi::spcCtr) % prtl_stride == 0)) { @@ -121,7 +117,7 @@ namespace ntt { array_t out_counter { "out_counter" }; Kokkos::parallel_for( "RecordOutputIndices", - npart(), + rangeActiveParticles(), Lambda(index_t p) { if ((tag_d(p) == ParticleTag::alive) and (pld_i_d(p, pldi::spcCtr) % prtl_stride == 0)) { diff --git a/src/global/arch/kokkos_aliases.h b/src/global/arch/kokkos_aliases.h index e86da1380..06fff56ac 100644 --- a/src/global/arch/kokkos_aliases.h +++ b/src/global/arch/kokkos_aliases.h @@ -36,6 +36,9 @@ namespace math = Kokkos; template using array_t = Kokkos::View; +template +using array_h_t = Kokkos::View; + // Array mirror alias of arbitrary type template using array_mirror_t = typename array_t::HostMirror; diff --git a/src/kernels/prtls_to_phys.hpp b/src/kernels/prtls_to_phys.hpp index eb0903bd8..8ac9ba38c 100644 --- a/src/kernels/prtls_to_phys.hpp +++ b/src/kernels/prtls_to_phys.hpp @@ -118,7 +118,7 @@ namespace kernel { } Inline void operator()(index_t p) const { - if constexpr (!T) { // no tracking enabled + if constexpr (not T) { // no tracking enabled bufferX(p * stride, p); bufferU(p * stride, p); buff_wei(p) = weight(p * stride); @@ -131,7 +131,7 @@ namespace kernel { } } - Inline void bufferX(index_t& p_from, index_t& p_to) const { + Inline void bufferX(const index_t& p_from, const index_t& p_to) const { if constexpr ((D == Dim::_1D) || (D == Dim::_2D) || (D == Dim::_3D)) { buff_x1(p_to) = metric.template convert<1, Crd::Cd, Crd::Ph>( static_cast(i1(p_from)) + static_cast(dx1(p_from))); @@ -148,7 +148,7 @@ namespace kernel { } } - Inline void bufferU(index_t& p_from, index_t& p_to) const { + Inline void bufferU(const index_t& p_from, const index_t& p_to) const { vec_t u_Phys { ZERO }; if constexpr (D == Dim::_1D) { if constexpr (M::CoordType == Coord::Cart) { @@ -206,16 +206,12 @@ namespace kernel { buff_ux3(p_to) = u_Phys[2]; } - Inline void bufferPlds(index_t& p_from, index_t& p_to) const { - if (buff_pldr.extent(0) > 0) { - for (auto pr { 0u }; pr < buff_pldr.extent(1); ++pr) { - buff_pldr(p_to, pr) = pld_r(p_from, pr); - } + Inline void bufferPlds(const index_t& p_from, const index_t& p_to) const { + for (auto pr { 0u }; pr < buff_pldr.extent(1); ++pr) { + buff_pldr(p_to, pr) = pld_r(p_from, pr); } - if (buff_pldi.extent(0) > 0) { - for (auto pi { 0u }; pi < buff_pldi.extent(1); ++pi) { - buff_pldi(p_to, pi) = pld_i(p_from, pi); - } + for (auto pi { 0u }; pi < buff_pldi.extent(1); ++pi) { + buff_pldi(p_to, pi) = pld_i(p_from, pi); } } }; diff --git a/src/output/utils/writers.h b/src/output/utils/writers.h index 1da24e9ee..5980e2cd6 100644 --- a/src/output/utils/writers.h +++ b/src/output/utils/writers.h @@ -58,14 +58,19 @@ namespace out { std::size_t local_size, std::size_t global_size, std::size_t local_offset) { - const auto slice = range_tuple_t(0, local_size); - auto var = io.InquireVariable(name); + auto var = io.InquireVariable(name); var.SetShape({ global_size }); var.SetSelection(adios2::Box({ local_offset }, { local_size })); auto data_h = Kokkos::create_mirror_view(data); Kokkos::deep_copy(data_h, data); - writer.Put(var, data_h.data(), adios2::Mode::Sync); + if (!data_h.span_is_contiguous()) { + array_h_t data_contig_h { "data_contig_h", local_size }; + Kokkos::deep_copy(data_contig_h, data_h); + writer.Put(var, data_contig_h.data(), adios2::Mode::Sync); + } else { + writer.Put(var, data_h.data(), adios2::Mode::Sync); + } } template From aec8d05217ff3228a62f92935758f23ccecea92f Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 24 Nov 2025 17:41:21 -0500 Subject: [PATCH 115/154] species printer adjusted --- src/global/utils/diag.cpp | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index ba6c8e4b2..f5f9338db 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -26,11 +26,12 @@ namespace diag { npart_t npart, npart_t maxnpart) -> std::vector> { auto stats = std::vector>(); + const auto percentage = [](npart_t part, npart_t maxpart) -> unsigned short { + return static_cast( + 100.0f * static_cast(part) / static_cast(maxpart)); + }; #if !defined(MPI_ENABLED) - stats.push_back( - { npart, - static_cast( - 100.0f * static_cast(npart) / static_cast(maxnpart)) }); + stats.push_back({ npart, percentage(npart, maxnpart) }); #else int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -56,22 +57,18 @@ namespace diag { if (rank != MPI_ROOT_RANK) { return stats; } - auto tot_npart = std::accumulate(mpi_npart.begin(), mpi_npart.end(), 0); - const auto max_idx = std::distance( + const npart_t tot_npart = std::accumulate(mpi_npart.begin(), mpi_npart.end(), 0u); + const npart_t max_idx = std::distance( mpi_npart.begin(), std::max_element(mpi_npart.begin(), mpi_npart.end())); - const auto min_idx = std::distance( + const npart_t min_idx = std::distance( mpi_npart.begin(), std::min_element(mpi_npart.begin(), mpi_npart.end())); stats.push_back({ tot_npart, 0u }); stats.push_back({ mpi_npart[min_idx], - static_cast( - 100.0f * static_cast(mpi_npart[min_idx]) / - static_cast(mpi_maxnpart[min_idx])) }); + percentage(mpi_npart[min_idx], mpi_maxnpart[min_idx]) }); stats.push_back({ mpi_npart[max_idx], - static_cast( - 100.0f * static_cast(mpi_npart[max_idx]) / - static_cast(mpi_maxnpart[max_idx])) }); + percentage(mpi_npart[max_idx], mpi_maxnpart[max_idx]) }); #endif return stats; } @@ -182,7 +179,7 @@ namespace diag { const auto max_pct = part_stats[2].second; ss << fmt::alignedTable( { - fmt::format("species %2lu (%s)", i, species_labels[i].c_str()), + fmt::format("species %2lu (%s)", i + 1, species_labels[i].c_str()), tot_npart > 9999 ? fmt::format("%.2Le", (long double)tot_npart) : std::to_string(tot_npart), std::to_string(min_pct) + "%", @@ -208,7 +205,7 @@ namespace diag { const auto tot_pct = part_stats[0].second; ss << fmt::alignedTable( { - fmt::format("species %2lu (%s)", i, species_labels[i].c_str()), + fmt::format("species %2lu (%s)", i + 1, species_labels[i].c_str()), tot_npart > 9999 ? fmt::format("%.2Le", (long double)tot_npart) : std::to_string(tot_npart), std::to_string(tot_pct) + "%", From d0a343cdacf44a4e6a83cc5815a7d1197270a4b9 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 25 Nov 2025 15:27:38 -0500 Subject: [PATCH 116/154] unnecessary probe_box in rec --- pgens/reconnection/pgen.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pgens/reconnection/pgen.hpp b/pgens/reconnection/pgen.hpp index 8af53c52e..b446c11f2 100644 --- a/pgens/reconnection/pgen.hpp +++ b/pgens/reconnection/pgen.hpp @@ -245,15 +245,10 @@ namespace user { const auto dx = domain.mesh.metric.template sqrt_h_<1, 1>({}); boundaries_t inj_box_up, inj_box_down; - boundaries_t probe_box_up, probe_box_down; inj_box_up.push_back(Range::All); inj_box_down.push_back(Range::All); - probe_box_up.push_back(Range::All); - probe_box_down.push_back(Range::All); inj_box_up.push_back({ ymax - inj_ypad - 10 * dx, ymax - inj_ypad }); inj_box_down.push_back({ ymin + inj_ypad, ymin + inj_ypad + 10 * dx }); - probe_box_up.push_back({ ymax - inj_ypad - 10 * dx, ymax - inj_ypad }); - probe_box_down.push_back({ ymin + inj_ypad, ymin + inj_ypad + 10 * dx }); if constexpr (M::Dim == Dim::_3D) { inj_box_up.push_back(Range::All); From 34ddd52c3f842ba80c57d7e877a392892958383e Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Wed, 26 Nov 2025 15:41:38 +0000 Subject: [PATCH 117/154] add output of deposit scheme and order to info file --- src/engines/engine_printer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index cbfde9304..793ba96eb 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -305,6 +305,12 @@ namespace ntt { params.template get("simulation.name").c_str()); add_param(report, 4, "Problem generator", "%s", pgen.c_str()); add_param(report, 4, "Engine", "%s", SimEngine(S).to_string()); + #if SHAPE_ORDER == 0 + add_param(report, 4, "Deposit", "%s", "zigzag"); +#else + add_param(report, 4, "Deposit", "%s", "esirkepov"); + add_param(report, 4, "Interpolation order", "%i", SHAPE_ORDER); +#endif add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); add_param(report, 4, "Timestep [dt]", "%.3e", dt); add_param(report, 4, "Runtime", "%.3e [%d steps]", runtime, max_steps); From 6a079313aa05e018cfedfd0da1f8c0ae731a0f42 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Sun, 30 Nov 2025 15:23:31 +0000 Subject: [PATCH 118/154] fix order of printing for more conistency --- src/engines/engine_printer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engines/engine_printer.cpp b/src/engines/engine_printer.cpp index 793ba96eb..91863abf0 100644 --- a/src/engines/engine_printer.cpp +++ b/src/engines/engine_printer.cpp @@ -305,13 +305,13 @@ namespace ntt { params.template get("simulation.name").c_str()); add_param(report, 4, "Problem generator", "%s", pgen.c_str()); add_param(report, 4, "Engine", "%s", SimEngine(S).to_string()); - #if SHAPE_ORDER == 0 + add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); +#if SHAPE_ORDER == 0 add_param(report, 4, "Deposit", "%s", "zigzag"); #else add_param(report, 4, "Deposit", "%s", "esirkepov"); add_param(report, 4, "Interpolation order", "%i", SHAPE_ORDER); #endif - add_param(report, 4, "Metric", "%s", Metric(M::MetricType).to_string()); add_param(report, 4, "Timestep [dt]", "%.3e", dt); add_param(report, 4, "Runtime", "%.3e [%d steps]", runtime, max_steps); report += "\n"; From 73597bd952c6b65b6d0531d27e5112e915bc7e4f Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 1 Dec 2025 15:24:12 -0500 Subject: [PATCH 119/154] rm legacy code, Inline function now pass by values, bug with ZERO --- legacy/Makefile.in | 174 --- legacy/Nttiny.mk | 38 - legacy/Tests.mk | 108 -- legacy/_monopole/monopole.toml | 80 -- legacy/_monopole/pgen.hpp | 96 -- legacy/benchmark.cpp | 273 ---- legacy/benchmarks/CMakeLists.txt | 61 - legacy/benchmarks/gr.cpp | 144 --- legacy/benchmarks/sr-mink.cpp | 176 --- legacy/benchmarks/sr-sph.cpp | 142 --- legacy/cmake/config.cmake | 117 -- legacy/configure.py | 421 ------- legacy/deploy/aux/argparse.sh | 45 - legacy/deploy/aux/aux.sh | 86 -- legacy/deploy/aux/config.sh | 73 -- legacy/deploy/aux/default.sh | 21 - legacy/deploy/aux/globals.sh | 39 - legacy/deploy/aux/run.sh | 172 --- legacy/deploy/compile_adios2.sh | 268 ---- legacy/deploy/compile_hdf5.sh | 166 --- legacy/deploy/compile_kokkos.sh | 215 ---- legacy/deploy/compile_ompi.sh | 161 --- legacy/deploy/compile_ucx.sh | 82 -- legacy/deploy/deploy.py | 282 ----- legacy/deploy/personal.toml | 16 - legacy/deploy/stellar.toml | 16 - legacy/deploy/zaratan.toml | 16 - legacy/src/framework/digital_filters.hpp | 180 --- legacy/src/framework/io/output_csv.cpp | 194 --- legacy/src/framework/io/output_csv.h | 53 - legacy/src/framework/ks_phys_units.h | 110 -- .../framework/metrics/kerr_schild_nomass.h | 335 ----- legacy/src/framework/utils/current_filter.cpp | 111 -- legacy/src/framework/utils/current_filter.hpp | 272 ---- .../framework/utils/particle_injectors.hpp | 1018 --------------- legacy/src/framework/utils/timer.cpp | 120 -- legacy/src/framework/utils/timer.h | 95 -- legacy/src/framework/writer.cpp | 68 - legacy/src/grpic/boundaries/fields_bc.cpp | 82 -- legacy/src/nttiny.cpp | 124 -- legacy/src/particle_pusher_sr.hpp | 966 --------------- legacy/src/pic/boundaries/currents_bc.cpp | 84 -- legacy/src/pic/boundaries/currents_bc.hpp | 65 - legacy/src/pic/fields/ampere_curv.hpp | 133 -- legacy/src/pic/fields/ampere_mink.hpp | 77 -- legacy/src/pic/fields/faraday_curv.hpp | 87 -- legacy/src/pic/fields/faraday_mink.hpp | 77 -- legacy/src/pic/particles/particle_pusher.hpp | 1093 ----------------- legacy/src/pic/pgen/old/debug.cpp | 75 -- legacy/src/pic/pgen/old/debug.hpp | 26 - legacy/src/pic/pgen/old/deposit.cpp | 83 -- legacy/src/pic/pgen/old/deposit.hpp | 24 - legacy/src/pic/pgen/old/em.cpp | 109 -- legacy/src/pic/pgen/old/em.hpp | 28 - legacy/src/pic/pgen/old/magnetosphere.hpp | 220 ---- legacy/src/pic/pgen/old/oneprtl.cpp | 74 -- legacy/src/pic/pgen/old/oneprtl.hpp | 24 - legacy/src/pic/pgen/old/oneprtl_sph.cpp | 153 --- legacy/src/pic/pgen/old/oneprtl_sph.hpp | 26 - legacy/tests/TODO_CMakeLists.txt | 263 ---- legacy/tests/deposit.cpp | 258 ---- legacy/tests/kernels-gr.cpp | 211 ---- legacy/tests/kernels-sr.cpp | 225 ---- legacy/tests/pusher-sr-mink.cpp | 194 --- legacy/tests/utils-comm.cpp | 212 ---- legacy/tests/utils-metadomain.cpp | 159 --- legacy/tests/utils-writer.cpp | 163 --- src/archetypes/energy_dist.h | 15 +- src/global/tests/enums.cpp | 2 +- src/global/utils/sorting.h | 6 +- src/kernels/fields_bcs.hpp | 2 +- src/kernels/injectors.hpp | 18 +- src/kernels/particle_pusher_gr.hpp | 20 +- src/kernels/particle_pusher_sr.hpp | 32 +- src/kernels/prtls_to_phys.hpp | 6 +- src/kernels/tests/ext_force.cpp | 14 +- src/kernels/tests/gca_pusher.cpp | 2 +- src/kernels/tests/prtl_bc.cpp | 2 +- src/kernels/tests/pusher.cpp | 4 +- src/metrics/kerr_schild.h | 22 +- src/metrics/kerr_schild_0.h | 10 +- src/metrics/minkowski.h | 4 +- src/metrics/qkerr_schild.h | 30 +- src/metrics/qspherical.h | 12 +- src/metrics/spherical.h | 6 +- 85 files changed, 100 insertions(+), 11466 deletions(-) delete mode 100644 legacy/Makefile.in delete mode 100644 legacy/Nttiny.mk delete mode 100644 legacy/Tests.mk delete mode 100644 legacy/_monopole/monopole.toml delete mode 100644 legacy/_monopole/pgen.hpp delete mode 100644 legacy/benchmark.cpp delete mode 100644 legacy/benchmarks/CMakeLists.txt delete mode 100644 legacy/benchmarks/gr.cpp delete mode 100644 legacy/benchmarks/sr-mink.cpp delete mode 100644 legacy/benchmarks/sr-sph.cpp delete mode 100644 legacy/cmake/config.cmake delete mode 100644 legacy/configure.py delete mode 100644 legacy/deploy/aux/argparse.sh delete mode 100644 legacy/deploy/aux/aux.sh delete mode 100644 legacy/deploy/aux/config.sh delete mode 100644 legacy/deploy/aux/default.sh delete mode 100644 legacy/deploy/aux/globals.sh delete mode 100644 legacy/deploy/aux/run.sh delete mode 100644 legacy/deploy/compile_adios2.sh delete mode 100644 legacy/deploy/compile_hdf5.sh delete mode 100644 legacy/deploy/compile_kokkos.sh delete mode 100644 legacy/deploy/compile_ompi.sh delete mode 100644 legacy/deploy/compile_ucx.sh delete mode 100644 legacy/deploy/deploy.py delete mode 100644 legacy/deploy/personal.toml delete mode 100644 legacy/deploy/stellar.toml delete mode 100644 legacy/deploy/zaratan.toml delete mode 100644 legacy/src/framework/digital_filters.hpp delete mode 100644 legacy/src/framework/io/output_csv.cpp delete mode 100644 legacy/src/framework/io/output_csv.h delete mode 100644 legacy/src/framework/ks_phys_units.h delete mode 100644 legacy/src/framework/metrics/kerr_schild_nomass.h delete mode 100644 legacy/src/framework/utils/current_filter.cpp delete mode 100644 legacy/src/framework/utils/current_filter.hpp delete mode 100644 legacy/src/framework/utils/particle_injectors.hpp delete mode 100644 legacy/src/framework/utils/timer.cpp delete mode 100644 legacy/src/framework/utils/timer.h delete mode 100644 legacy/src/framework/writer.cpp delete mode 100644 legacy/src/grpic/boundaries/fields_bc.cpp delete mode 100644 legacy/src/nttiny.cpp delete mode 100644 legacy/src/particle_pusher_sr.hpp delete mode 100644 legacy/src/pic/boundaries/currents_bc.cpp delete mode 100644 legacy/src/pic/boundaries/currents_bc.hpp delete mode 100644 legacy/src/pic/fields/ampere_curv.hpp delete mode 100644 legacy/src/pic/fields/ampere_mink.hpp delete mode 100644 legacy/src/pic/fields/faraday_curv.hpp delete mode 100644 legacy/src/pic/fields/faraday_mink.hpp delete mode 100644 legacy/src/pic/particles/particle_pusher.hpp delete mode 100644 legacy/src/pic/pgen/old/debug.cpp delete mode 100644 legacy/src/pic/pgen/old/debug.hpp delete mode 100644 legacy/src/pic/pgen/old/deposit.cpp delete mode 100644 legacy/src/pic/pgen/old/deposit.hpp delete mode 100644 legacy/src/pic/pgen/old/em.cpp delete mode 100644 legacy/src/pic/pgen/old/em.hpp delete mode 100644 legacy/src/pic/pgen/old/magnetosphere.hpp delete mode 100644 legacy/src/pic/pgen/old/oneprtl.cpp delete mode 100644 legacy/src/pic/pgen/old/oneprtl.hpp delete mode 100644 legacy/src/pic/pgen/old/oneprtl_sph.cpp delete mode 100644 legacy/src/pic/pgen/old/oneprtl_sph.hpp delete mode 100644 legacy/tests/TODO_CMakeLists.txt delete mode 100644 legacy/tests/deposit.cpp delete mode 100644 legacy/tests/kernels-gr.cpp delete mode 100644 legacy/tests/kernels-sr.cpp delete mode 100644 legacy/tests/pusher-sr-mink.cpp delete mode 100644 legacy/tests/utils-comm.cpp delete mode 100644 legacy/tests/utils-metadomain.cpp delete mode 100644 legacy/tests/utils-writer.cpp diff --git a/legacy/Makefile.in b/legacy/Makefile.in deleted file mode 100644 index 70d9da31a..000000000 --- a/legacy/Makefile.in +++ /dev/null @@ -1,174 +0,0 @@ -# # # # # Directories # # # # # # # # # # -# -ROOT_DIR := $(realpath ${CURDIR}/..) -# directory for the building -BUILD_DIR := ${ROOT_DIR}/@BUILD_DIR@ -# directory for the executable -BIN_DIR := ${ROOT_DIR}/@BIN_DIR@ -TEMP_DIR := .temp - -TARGET := @NTT_TARGET@ -TEST_TARGET := @TEST_TARGET@ - -PGEN_DIR := ${ROOT_DIR}/@PGEN_DIR@ - -PGEN := @PGEN@ - -SRC_DIR := ${ROOT_DIR}/@SRC_DIR@ -BUILD_SRC_DIR := @SRC_DIR@ - -# external libraries -EXT_DIR := ${ROOT_DIR}/@EXTERN_DIR@ - -# # # # # Settings # # # # # # # # # # # # -# -DEBUGMODE := @DEBUGMODE@ -VERBOSE := @VERBOSE@ - -DEFINITIONS := @DEFINITIONS@ - -ifeq ($(strip ${VERBOSE}), y) - HIDE = - PREPFLAGS = -DVERBOSE -Werror -else - HIDE = @ -endif - -# 3-rd party library configurations -KOKKOS_PATH := ${EXT_DIR}/kokkos -KOKKOS_BUILD_DIR = ${BUILD_DIR}/kokkos/ - -KOKKOS_ARCH := @KOKKOS_ARCH@ -KOKKOS_DEVICES := @KOKKOS_DEVICES@ -KOKKOS_OPTIONS := @KOKKOS_OPTIONS@ - -KOKKOS_CUDA_OPTIONS := @KOKKOS_CUDA_OPTIONS@ -KOKKOS_CXX_STANDARD := @CXXSTANDARD@ - -PREPFLAGS := ${PREPFLAGS} - -# # # # # Compiler and flags # # # # # # # -# -CXX := @COMPILER@ -HOST_CXX := @HOST_COMPILER@ -LINK := ${CXX} -CXXSTANDARD := -std=@CXXSTANDARD@ -ifeq ($(strip ${DEBUGMODE}), n) - # linker configuration flags (e.g. optimization level) - CFLAGS := @RELEASE_CFLAGS@ -else - CFLAGS := @DEBUG_CFLAGS@ -endif - -# warning flags -WARNFLAGS := @WARNING_FLAGS@ - -# custom preprocessor flags -PREPFLAGS := $(PREPFLAGS) @PRECISION@ -D@METRIC@_METRIC -D@SIMTYPE@_SIMTYPE - -CFLAGS := $(CFLAGS) $(WARNFLAGS) $(PREPFLAGS) -LIBS := -lstdc++fs - -# # # # # Targets # # # # # # # # # # # # # # -# -.PHONY: help ntt demo clean cleanlib cleanall pgenCopy - -default: help demo - -# linking the main app -ntt : pgenCopy ${BIN_DIR}/${TARGET} - @echo [M]aking $@ - -ifeq (${PGEN},) -PGEN := dummy -endif - -# Problem generator -pgenCopy: ${SRC_DIR}/${TEMP_DIR}/problem_generator.cpp ${SRC_DIR}/${TEMP_DIR}/problem_generator.hpp - -${SRC_DIR}/${TEMP_DIR}/problem_generator.cpp : ${PGEN_DIR}/${PGEN}.cpp - $(HIDE)mkdir -p ${SRC_DIR}/${TEMP_DIR} - $(HIDE)cp $< $@ - -${SRC_DIR}/${TEMP_DIR}/problem_generator.hpp : ${PGEN_DIR}/${PGEN}.hpp - $(HIDE)mkdir -p ${SRC_DIR}/${TEMP_DIR} - $(HIDE)cp $< $@ - -help: - @echo - @echo "usage: \`make [ ntt | vis | demo ]\`" - @echo - @echo "cleanup: \`make [ clean | cleanlib | cleanall ]\`" - @echo - -demo: - @echo "[C]ompile command:" - @echo ${compile_command} -c \<.cpp\> -o \<.o\> - @echo - @echo "[L]ink command:" - @echo ${link_command} \<.o\> $(LIBS) -o \ - -# # # # # File collection # # # # # # # # # # # -# -# Src files -simtype := @SIMTYPE@ -simtype := $(shell echo $(simtype) | tr A-Z a-z) -SRCS := $(wildcard ${SRC_DIR}/*.cpp ${SRC_DIR}/*.c) -SRCS := $(SRCS) ${SRC_DIR}/${TEMP_DIR}/problem_generator.cpp -SRCS := $(SRCS) $(shell @FIND@ ${SRC_DIR}/framework -name "*.cpp" -o -name "*.c") -SRCS := $(SRCS) $(shell @FIND@ ${SRC_DIR}/${simtype} -name "*.cpp" -o -name "*.c") -SRCS := $(filter-out ${SRC_DIR}/main.cpp, $(SRCS)) -OBJS := $(subst ${SRC_DIR},${BUILD_SRC_DIR},$(SRCS:%=%.o)) -DEPS := $(OBJS:.o=.d) - -# Main app -MAIN_SRCS := ${SRC_DIR}/main.cpp -MAIN_OBJS := $(subst ${SRC_DIR},${BUILD_SRC_DIR},$(MAIN_SRCS:%=%.o)) -MAIN_DEPS := $(MAIN_OBJS:.o=.d) - -INC_DIRS := $(shell @FIND@ ${SRC_DIR} -type d) ${SRC_DIR}/${TEMP_DIR} ${EXT_DIR}/plog/include ${EXT_DIR}/doctest/doctest ${EXT_DIR} ${EXT_DIR}/rapidcsv/src -INCFLAGS := $(addprefix -I,${INC_DIRS}) - -# # # # # Link/compile # # # # # # # # # # # # # # - -include ${KOKKOS_PATH}/Makefile.kokkos -OBJS := $(OBJS) $(KOKKOS_LINK_DEPENDS) -CFLAGS := $(CFLAGS) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -LDFLAGS := $(LDFLAGS) $(KOKKOS_LDFLAGS) -LIBS := $(LIBS) $(KOKKOS_LIBS) -CFLAGS := $(filter-out ${CXXSTANDARD}, $(CFLAGS)) - -compile_command := ${CXX} ${CXXSTANDARD} $(INCFLAGS) $(DEFINITIONS) $(CFLAGS) -MMD -link_command := ${LINK} $(LDFLAGS) - -${BIN_DIR}/${TARGET} : $(MAIN_OBJS) $(OBJS) - @echo [L]inking $(notdir $@) from $< - $(HIDE)mkdir -p ${BIN_DIR} - $(HIDE)${link_command} $^ -o $@ $(LIBS) - -${BUILD_SRC_DIR}/%.o : ${SRC_DIR}/% - @echo [C]ompiling \`src\`: $(subst ${ROOT_DIR}/,,$<) - $(HIDE)mkdir -p $(dir $@) - $(HIDE)${compile_command} -c $< -o $@ - -include ${ROOT_DIR}/Tests.mk -include ${ROOT_DIR}/Docs.mk - -# to ensure recompilation when header files are changed --include $(DEPS) $(MAIN_DEPS) - -# for nttiny /> -NTTINY_DIR := @NTTINY_DIR@ -VIS_DIR := ${ROOT_DIR}/@VIS_DIR@ -include ${ROOT_DIR}/Nttiny.mk -# - struct InitFields { - InitFields(real_t bsurf, real_t rstar) : Bsurf { bsurf }, Rstar { rstar } {} - - Inline auto bx1(const coord_t& x_Ph) const -> real_t { - return Bsurf * SQR(Rstar / x_Ph[0]); - } - - private: - const real_t Bsurf, Rstar; - }; - - template - struct DriveFields : public InitFields { - DriveFields(real_t time, real_t bsurf, real_t rstar, real_t omega) - : InitFields { bsurf, rstar } - , time { time } - , Omega { omega } {} - - using InitFields::bx1; - - Inline auto bx2(const coord_t&) const -> real_t { - return ZERO; - } - - Inline auto bx3(const coord_t&) const -> real_t { - return ZERO; - } - - Inline auto ex1(const coord_t& x_Ph) const -> real_t { - return ZERO; - } - - Inline auto ex2(const coord_t& x_Ph) const -> real_t { - return -Omega * bx1(x_Ph) * x_Ph[0] * math::sin(x_Ph[1]); - } - - Inline auto ex3(const coord_t&) const -> real_t { - return ZERO; - } - - private: - const real_t time, Omega; - }; - - template - struct PGen : public arch::ProblemGenerator { - // compatibility traits for the problem generator - static constexpr auto engines { traits::compatible_with::value }; - static constexpr auto metrics { - traits::compatible_with::value - }; - static constexpr auto dimensions { traits::compatible_with::value }; - - // for easy access to variables in the child class - using arch::ProblemGenerator::D; - using arch::ProblemGenerator::C; - using arch::ProblemGenerator::params; - - const real_t Bsurf, Rstar, Omega; - InitFields init_flds; - - inline PGen(const SimulationParams& p, const Metadomain& m) - : arch::ProblemGenerator(p) - , Bsurf { p.template get("setup.Bsurf", ONE) } - , Rstar { m.mesh().extent(in::x1).first } - , Omega { static_cast(constant::TWO_PI) / - p.template get("setup.period", ONE) } - , init_flds { Bsurf, Rstar } {} - - inline PGen() {} - - auto AtmFields(real_t time) const -> DriveFields { - return DriveFields { time, Bsurf, Rstar, Omega }; - } - }; - -} // namespace user - -#endif diff --git a/legacy/benchmark.cpp b/legacy/benchmark.cpp deleted file mode 100644 index 54fc17cf9..000000000 --- a/legacy/benchmark.cpp +++ /dev/null @@ -1,273 +0,0 @@ -#include "enums.h" -#include "global.h" - -#include "utils/error.h" - -#include "metrics/metric_base.h" -#include "metrics/minkowski.h" - -#include "framework/containers/species.h" -#include "framework/domain/domain.h" -#include "framework/domain/metadomain.h" - -#include - -#include "framework/domain/communications.cpp" -#include "mpi.h" -#include "mpi-ext.h" - -#define TIMER_START(label) \ - Kokkos::fence(); \ - auto start_##label = std::chrono::high_resolution_clock::now(); - -#define TIMER_STOP(label) \ - Kokkos::fence(); \ - auto stop_##label = std::chrono::high_resolution_clock::now(); \ - auto duration_##label = std::chrono::duration_cast( \ - stop_##label - start_##label) \ - .count(); \ - std::cout << "Timer [" #label "]: " << duration_##label << " microseconds" \ - << std::endl; - -/* - Test to check the performance of the new particle allocation scheme - - Create a metadomain object main() - - Set npart + initialize tags InitializeParticleArrays() - - 'Push' the particles by randomly updating the tags PushParticles() - - Communicate particles to neighbors and time the communication - - Compute the time taken for best of N iterations for the communication - */ -using namespace ntt; - -// Set npart and set the particle tags to alive -template -void InitializeParticleArrays(Domain& domain, const int npart) { - raise::ErrorIf(npart > domain.species[0].maxnpart(), - "Npart cannot be greater than maxnpart", - HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_npart(npart); - domain.species[i_spec].SyncHostDevice(); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Initialize particles", - npart, - Lambda(const std::size_t i) { this_tag(i) = ParticleTag::alive; }); - } - return; -} - -// Randomly reassign tags to particles for a fraction of particles -template -void PushParticles(Domain& domain, - const double send_frac, - const int seed_ind, - const int seed_tag) { - raise::ErrorIf(send_frac > 1.0, "send_frac cannot be greater than 1.0", HERE); - const auto nspecies = domain.species.size(); - for (int i_spec = 0; i_spec < nspecies; i_spec++) { - domain.species[i_spec].set_unsorted(); - const auto nparticles = domain.species[i_spec].npart(); - const auto nparticles_to_send = static_cast(send_frac * nparticles); - // Generate random indices to send - // Kokkos::Random_XorShift64_Pool<> random_pool(seed_ind); - Kokkos::View indices_to_send("indices_to_send", nparticles_to_send); - Kokkos::fill_random(indices_to_send, domain.random_pool, 0, nparticles); - // Generate random tags to send - // Kokkos::Random_XorShift64_Pool<> random_pool_tag(seed_tag); - Kokkos::View tags_to_send("tags_to_send", nparticles_to_send); - Kokkos::fill_random(tags_to_send, - domain.random_pool, - 0, - domain.species[i_spec].ntags()); - auto& this_tag = domain.species[i_spec].tag; - Kokkos::parallel_for( - "Push particles", - nparticles_to_send, - Lambda(const std::size_t i) { - auto prtl_to_send = indices_to_send(i); - auto tag_to_send = tags_to_send(i); - this_tag(prtl_to_send) = tag_to_send; - }); - domain.species[i_spec].npart_per_tag(); - domain.species[i_spec].SyncHostDevice(); - } - return; -} - -auto main(int argc, char* argv[]) -> int { - GlobalInitialize(argc, argv); - { - /* - MPI checks - */ - printf("Compile time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library has CUDA-aware support.\n", MPIX_CUDA_AWARE_SUPPORT); -#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT - printf("This MPI library does not have CUDA-aware support.\n"); -#else - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ -printf("Run time check:\n"); -#if defined(MPIX_CUDA_AWARE_SUPPORT) - if (1 == MPIX_Query_cuda_support()) { - printf("This MPI library has CUDA-aware support.\n"); - } else { - printf("This MPI library does not have CUDA-aware support.\n"); - } -#else /* !defined(MPIX_CUDA_AWARE_SUPPORT) */ - printf("This MPI library cannot determine if there is CUDA-aware support.\n"); -#endif /* MPIX_CUDA_AWARE_SUPPORT */ - - /* - Test to send and receive Kokkos arrays - */ - int sender_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &sender_rank); - - int neighbor_rank = 0; - if (sender_rank == 0) { - neighbor_rank = 1; - } - else if (sender_rank == 1) { - neighbor_rank = 0; - } - else { - raise::Error("This test is only for 2 ranks", HERE); - } - Kokkos::View send_array("send_array", 10); - Kokkos::View recv_array("recv_array", 10); - if (sender_rank == 0) { - Kokkos::deep_copy(send_array, 10); - } - else { - Kokkos::deep_copy(send_array, 20); - } - - auto send_array_host = Kokkos::create_mirror_view(send_array); - Kokkos::deep_copy(send_array_host, send_array); - auto host_recv_array = Kokkos::create_mirror_view(recv_array); - - MPI_Sendrecv(send_array.data(), send_array.extent(0), MPI_INT, neighbor_rank, 0, - recv_array.data(), recv_array.extent(0), MPI_INT, neighbor_rank, 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - // Print the received array - Kokkos::deep_copy(host_recv_array, recv_array); - for (int i = 0; i < 10; ++i) { - printf("Rank %d: Received %d\n", sender_rank, host_recv_array(i)); - } - - - std::cout << "Constructing the domain" << std::endl; - // Create a Metadomain object - const unsigned int ndomains = 2; - const std::vector global_decomposition = { - {-1, -1, -1} - }; - const std::vector global_ncells = { 32, 32, 32 }; - const boundaries_t global_extent = { - {0.0, 3.0}, - {0.0, 3.0}, - {0.0, 3.0} - }; - const boundaries_t global_flds_bc = { - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC}, - {FldsBC::PERIODIC, FldsBC::PERIODIC} - }; - const boundaries_t global_prtl_bc = { - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC}, - {PrtlBC::PERIODIC, PrtlBC::PERIODIC} - }; - const std::map metric_params = {}; - const int maxnpart = argc > 1 ? std::stoi(argv[1]) : 1000; - const double npart_to_send_frac = 0.01; - const int npart = static_cast(maxnpart * (1 - 2 * npart_to_send_frac)); - auto species = ntt::ParticleSpecies(1u, - "test_e", - 1.0f, - 1.0f, - maxnpart, - ntt::PrtlPusher::BORIS, - false, - ntt::Cooling::NONE); - auto metadomain = Metadomain>( - ndomains, - global_decomposition, - global_ncells, - global_extent, - global_flds_bc, - global_prtl_bc, - metric_params, - { species }); - - const auto local_subdomain_idx = metadomain.l_subdomain_indices()[0]; - auto local_domain = metadomain.subdomain_ptr(local_subdomain_idx); - auto timers = timer::Timers { { "Communication" }, nullptr, false }; - InitializeParticleArrays(*local_domain, npart); - // Timers for both the communication routines - auto total_time_elapsed_old = 0; - auto total_time_elapsed_new = 0; - - int seed_ind = 0; - int seed_tag = 1; - Kokkos::fence(); - - for (int i = 0; i < 10; ++i) { - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort new - Kokkos::fence(); - auto start_new = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticlesBuffer(*local_domain, &timers); - auto stop_new = std::chrono::high_resolution_clock::now(); - auto duration_new = std::chrono::duration_cast( - stop_new - start_new) - .count(); - total_time_elapsed_new += duration_new; - Kokkos::fence(); - } - { - // Push - seed_ind += 2; - seed_tag += 3; - PushParticles(*local_domain, npart_to_send_frac, seed_ind, seed_tag); - // Sort old - Kokkos::fence(); - auto start_old = std::chrono::high_resolution_clock::now(); - metadomain.CommunicateParticles(*local_domain, &timers); - auto stop_old = std::chrono::high_resolution_clock::now(); - auto duration_old = std::chrono::duration_cast( - stop_old - start_old) - .count(); - total_time_elapsed_old += duration_old; - Kokkos::fence(); - } - } - printf("Total time elapsed for old: %f us : %f us/prtl\n", - total_time_elapsed_old / 10.0, - total_time_elapsed_old / 10.0 * 1000 / npart); - printf("Total time elapsed for new: %f us : %f us/prtl\n", - total_time_elapsed_new / 10.0, - total_time_elapsed_new / 10.0 * 1000 / npart); - } - GlobalFinalize(); - return 0; -} - -/* - Buggy behavior: - Consider a single domain with a single mpi rank - Particle tag arrays is set to [0, 0, 1, 1, 2, 3, ...] for a single domain - CommunicateParticles() discounts all the dead particles and reassigns the - other tags to alive - CommunicateParticlesBuffer() only keeps the ParticleTag::Alive particles - and discounts the rest -*/ diff --git a/legacy/benchmarks/CMakeLists.txt b/legacy/benchmarks/CMakeLists.txt deleted file mode 100644 index 257a0f0ea..000000000 --- a/legacy/benchmarks/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../src) - -# include main source directory for all targets -include_directories(${SOURCE_DIR}) - -# --------------------------------- Wrapper -------------------------------- # -set(WRAPPER ${PROJECT_NAME}-wrapper) -add_library(${WRAPPER} STATIC ${SOURCE_DIR}/wrapper/kokkos.cpp) - -# link wrapper with all targets -link_libraries(${WRAPPER}) - -# include wrapper header for all targets -include_directories(${SOURCE_DIR}/wrapper) - -# -------------------------------- Framework ------------------------------- # -file(GLOB_RECURSE FRAMEWORK_FILES ${SOURCE_DIR}/framework/*.cpp) -file(GLOB_RECURSE PIC_FILES ${SOURCE_DIR}/pic/*.cpp) -file(GLOB_RECURSE GRPIC_FILES ${SOURCE_DIR}/grpic/*.cpp) - -# include framework headers for all targets -include_directories(${SOURCE_DIR}/framework) - -set(all_metrics ${sr_metrics} ${gr_metrics}) - -# Libraries for all metrics and engines -# compile framework for all metrics and engines + all engines with corresponding metrics -foreach(metric ${all_metrics}) - list(FIND sr_metrics ${metric} sr_metric_index) - - if(NOT ${sr_metric_index} EQUAL -1) - set(engine pic) - else() - set(engine grpic) - endif() - - string(TOUPPER ${metric} metric_upper) - string(TOUPPER ${engine} engine_upper) - - add_library(framework-${metric} STATIC ${FRAMEWORK_FILES}) - target_compile_options(framework-${metric} PUBLIC -D${metric_upper}_METRIC -D${engine_upper}_ENGINE -DSIMULATION_METRIC=\"${metric}\" -DMETRIC_HEADER=\"metrics/${metric}.h\") - - add_library(engine-${engine}-${metric} STATIC ${${engine_upper}_FILES}) - - target_compile_options(engine-${engine}-${metric} PUBLIC -D${metric_upper}_METRIC -D${engine_upper}_ENGINE -DSIMULATION_METRIC=\"${metric}\" -DMETRIC_HEADER=\"metrics/${metric}.h\") - target_compile_options(engine-${engine}-${metric} PUBLIC "-DPGEN_HEADER=\"pgen/dummy.hpp\"") - target_include_directories(engine-${engine}-${metric} PRIVATE ${SOURCE_DIR}/${engine}) -endforeach() - -# ----------------------------------- Benchmarks ----------------------------------- # -function(define_benchmark metric engine title file) - add_executable(bmark-${title}.xc ${file}) - target_link_libraries(bmark-${title}.xc PUBLIC framework-${metric} engine-${engine}-${metric}) - target_include_directories(bmark-${title}.xc PRIVATE ${SOURCE_DIR}/${engine}) -endfunction() - -define_benchmark("minkowski" "pic" "sr-mink" "sr-mink.cpp") -define_benchmark("spherical" "pic" "sr-sph" "sr-sph.cpp") -define_benchmark("qspherical" "pic" "sr-qsph" "sr-sph.cpp") -define_benchmark("kerr_schild" "grpic" "gr-ks" "gr.cpp") -define_benchmark("qkerr_schild" "grpic" "gr-qks" "gr.cpp") \ No newline at end of file diff --git a/legacy/benchmarks/gr.cpp b/legacy/benchmarks/gr.cpp deleted file mode 100644 index b00603603..000000000 --- a/legacy/benchmarks/gr.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "wrapper.h" - -#include "field_macros.h" -#include "grpic.h" -#include "sim_params.h" - -#include "io/cargs.h" -#include "io/input.h" -#include "meshblock/meshblock.h" - -#include "utilities/archetypes.hpp" -#include "utilities/injector.hpp" - -#include - -#include -#include -#include -#include -#include - -Inline void EMfield(const ntt::coord_t& x_ph, - ntt::vec_t& d_out, - ntt::vec_t& b_out, - const real_t sx1, - const real_t sx2) { - const real_t kx1_x1 = ntt::constant::TWO_PI * x_ph[0] / sx1; - const real_t kx2_x2 = ntt::constant::TWO_PI * x_ph[1] / sx2; - d_out[0] = math::cos(kx1_x1) * math::sin(kx2_x2); - d_out[1] = -math::sin(kx1_x1) * math::cos(kx2_x2); - d_out[2] = math::cos(kx1_x1) * math::cos(kx2_x2); - b_out[0] = math::sin(kx1_x1) * math::cos(kx2_x2); - b_out[1] = -math::cos(kx1_x1) * math::sin(kx2_x2); - b_out[2] = math::sin(kx1_x1) * math::sin(kx2_x2); -} - -template -struct MaxwellianDist : public ntt::EnergyDistribution { - MaxwellianDist(const ntt::SimulationParams& params, const ntt::Meshblock& mblock) - : ntt::EnergyDistribution(params, mblock), - maxwellian { mblock }, - temperature { 0.001 } {} - Inline void operator()(const ntt::coord_t&, - ntt::vec_t& v, - const int& species) const override { - maxwellian(v, temperature); - } - -private: - const ntt::Maxwellian maxwellian; - const real_t temperature; -}; - -using namespace toml::literals::toml_literals; -const auto default_input { - R"( - [domain] - resolution = [8192, 8192] - extent = [1.0, 50.0] - boundaries = [["OPEN", "ABSORB"], ["AXIS"]] - sph_rabsorb = 45.0 - qsph_r0 = 0.0 - qsph_h = 0.0 - a = 0.95 - - [units] - ppc0 = 1.0 - larmor0 = 2.0 - skindepth0 = 1.0 - - [particles] - n_species = 2 - - [species_1] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - - [species_2] - label = "e+" - mass = 25.0 - charge = 1.0 - maxnpart = 1e8 - - [diagnostics] - blocking_timers = true - )"_toml -}; - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - ntt::CommandLineArguments cl_args; - cl_args.readCommandLineArguments(argc, argv); - toml::value inputdata; - - auto n_iter_str = cl_args.getArgument("-niter", "10"); - auto n_iter = std::stoi(std::string(n_iter_str)); - - if (cl_args.isSpecified("-input")) { - auto inputfilename = cl_args.getArgument("-input"); - inputdata = toml::parse(static_cast(inputfilename)); - } else { - inputdata = default_input; - } - auto sim = ntt::GRPIC(inputdata); - - auto params = *(sim.params()); - auto& mblock = sim.meshblock; - - { - const auto extent = params.extent(); - - sim.ResetSimulation(); - using namespace ntt; - const real_t sx1 = extent[1] - extent[0]; - const real_t sx2 = extent[3] - extent[2]; - Kokkos::parallel_for( - "InitFields", mblock.rangeActiveCells(), Lambda(ntt::index_t i1, ntt::index_t i2) { - set_em_fields_2d(mblock, i1, i2, EMfield, sx1, sx2); - }); - sim.Exchange(ntt::GhostCells::fields); - - ntt::InjectUniform( - params, sim.meshblock, { 1, 2 }, params.ppc0() * 0.5); - } - { - ntt::WaitAndSynchronize(); - - for (auto i { 0 }; i < n_iter; ++i) { - sim.StepForward(ntt::DiagFlags_Timers | ntt::DiagFlags_Species); - } - } - - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/benchmarks/sr-mink.cpp b/legacy/benchmarks/sr-mink.cpp deleted file mode 100644 index 7089d7e98..000000000 --- a/legacy/benchmarks/sr-mink.cpp +++ /dev/null @@ -1,176 +0,0 @@ -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" -#include "sim_params.h" - -#include "io/cargs.h" -#include "io/input.h" -#include "meshblock/meshblock.h" - -#include "utilities/archetypes.hpp" -#include "utilities/injector.hpp" - -#include - -#include -#include -#include -#include -#include - -Inline void EMfield_2d(const ntt::coord_t& x_ph, - ntt::vec_t& e_out, - ntt::vec_t& b_out, - const real_t sx1, - const real_t sx2) { - const real_t kx1_x1 = ntt::constant::TWO_PI * x_ph[0] / sx1; - const real_t kx2_x2 = ntt::constant::TWO_PI * x_ph[1] / sx2; - e_out[0] = math::cos(kx1_x1) * math::sin(kx2_x2); - e_out[1] = -math::sin(kx1_x1) * math::cos(kx2_x2); - e_out[2] = math::cos(kx1_x1) * math::cos(kx2_x2); - b_out[0] = math::sin(kx1_x1) * math::cos(kx2_x2); - b_out[1] = -math::cos(kx1_x1) * math::sin(kx2_x2); - b_out[2] = math::sin(kx1_x1) * math::sin(kx2_x2); -} - -Inline void EMfield_3d(const ntt::coord_t& x_ph, - ntt::vec_t& e_out, - ntt::vec_t& b_out, - const real_t sx1, - const real_t sx2, - const real_t) { - const real_t kx1_x1 = ntt::constant::TWO_PI * x_ph[0] / sx1; - const real_t kx2_x2 = ntt::constant::TWO_PI * x_ph[1] / sx2; - e_out[0] = math::cos(kx1_x1) * math::sin(kx2_x2); - e_out[1] = -math::sin(kx1_x1) * math::cos(kx2_x2); - e_out[2] = math::cos(kx1_x1) * math::cos(kx2_x2); - b_out[0] = math::sin(kx1_x1) * math::cos(kx2_x2); - b_out[1] = -math::cos(kx1_x1) * math::sin(kx2_x2); - b_out[2] = math::sin(kx1_x1) * math::sin(kx2_x2); -} - -using namespace toml::literals::toml_literals; -const auto default_input { - R"( - [domain] - resolution = [8192, 8192] - extent = [-5.0, 5.0, -5.0, 5.0] - boundaries = [["PERIODIC"], ["PERIODIC"]] - - [algorithm] - cfl = 0.0001 - - [units] - ppc0 = 2.0 - larmor0 = 2.0 - skindepth0 = 1.0 - - [particles] - n_species = 2 - - [species_1] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - - [species_2] - label = "e+" - mass = 25.0 - charge = 1.0 - maxnpart = 1e8 - - [diagnostics] - blocking_timers = true - )"_toml -}; - -template -struct MaxwellianDist : public ntt::EnergyDistribution { - MaxwellianDist(const ntt::SimulationParams& params, const ntt::Meshblock& mblock) - : ntt::EnergyDistribution(params, mblock), - maxwellian { mblock }, - temperature { ONE } {} - Inline void operator()(const ntt::coord_t&, - ntt::vec_t& v, - const int& species) const override { - maxwellian(v, temperature); - } - -private: - const ntt::Maxwellian maxwellian; - const real_t temperature; -}; - -template -auto Run(const toml::value input, const int n_iter) -> void { - using namespace ntt; - auto sim = PIC(input); - - auto params = *(sim.params()); - auto& mblock = sim.meshblock; - - { - auto extent = params.extent(); - sim.ResetSimulation(); - const real_t sx1 = extent[1] - extent[0]; - const real_t sx2 = extent[3] - extent[2]; - if constexpr (D == Dim2) { - Kokkos::parallel_for( - "InitFields", mblock.rangeActiveCells(), Lambda(index_t i1, index_t i2) { - set_em_fields_2d(mblock, i1, i2, EMfield_2d, sx1, sx2); - }); - } else if constexpr (D == Dim3) { - const real_t sx3 = extent[5] - extent[4]; - Kokkos::parallel_for( - "InitFields", mblock.rangeActiveCells(), Lambda(index_t i1, index_t i2, index_t i3) { - set_em_fields_3d(mblock, i1, i2, i3, EMfield_3d, sx1, sx2, sx3); - }); - } - sim.Exchange(ntt::GhostCells::fields); - - ntt::InjectUniform( - params, sim.meshblock, { 1, 2 }, params.ppc0() * 0.5); - } - { - ntt::WaitAndSynchronize(); - - for (auto i { 0 }; i < n_iter; ++i) { - sim.StepForward(ntt::DiagFlags_Timers | ntt::DiagFlags_Species); - } - } -} - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - ntt::CommandLineArguments cl_args; - cl_args.readCommandLineArguments(argc, argv); - toml::value inputdata; - - auto n_iter_str = cl_args.getArgument("-niter", "10"); - auto n_iter = std::stoi(std::string(n_iter_str)); - - if (cl_args.isSpecified("-input")) { - auto inputfilename = cl_args.getArgument("-input"); - inputdata = toml::parse(static_cast(inputfilename)); - } else { - inputdata = default_input; - } - auto resolution - = ntt::readFromInput>(inputdata, "domain", "resolution"); - if (resolution.size() == 2) { - Run(inputdata, n_iter); - } else { - Run(inputdata, n_iter); - } - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/benchmarks/sr-sph.cpp b/legacy/benchmarks/sr-sph.cpp deleted file mode 100644 index c1de62f23..000000000 --- a/legacy/benchmarks/sr-sph.cpp +++ /dev/null @@ -1,142 +0,0 @@ -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" -#include "sim_params.h" - -#include "io/cargs.h" -#include "io/input.h" -#include "meshblock/meshblock.h" - -#include "utilities/archetypes.hpp" -#include "utilities/injector.hpp" - -#include - -#include -#include -#include -#include -#include - -Inline void EMfield(const ntt::coord_t& x_ph, - ntt::vec_t& e_out, - ntt::vec_t& b_out, - const real_t sx1, - const real_t sx2) { - const real_t kx1_x1 = ntt::constant::TWO_PI * x_ph[0] / sx1; - const real_t kx2_x2 = ntt::constant::TWO_PI * x_ph[1] / sx2; - e_out[0] = math::cos(kx1_x1) * math::sin(kx2_x2); - e_out[1] = -math::sin(kx1_x1) * math::cos(kx2_x2); - e_out[2] = math::cos(kx1_x1) * math::cos(kx2_x2); - b_out[0] = math::sin(kx1_x1) * math::cos(kx2_x2); - b_out[1] = -math::cos(kx1_x1) * math::sin(kx2_x2); - b_out[2] = math::sin(kx1_x1) * math::sin(kx2_x2); -} - -template -struct MaxwellianDist : public ntt::EnergyDistribution { - MaxwellianDist(const ntt::SimulationParams& params, const ntt::Meshblock& mblock) - : ntt::EnergyDistribution(params, mblock), - maxwellian { mblock }, - temperature { 0.001 } {} - Inline void operator()(const ntt::coord_t&, - ntt::vec_t& v, - const int& species) const override { - maxwellian(v, temperature); - } - -private: - const ntt::Maxwellian maxwellian; - const real_t temperature; -}; - -using namespace toml::literals::toml_literals; -const auto default_input { - R"( - [domain] - resolution = [8192, 8192] - extent = [1.0, 50.0] - boundaries = [["CUSTOM", "ABSORB"], ["AXIS"]] - sph_rabsorb = 45.0 - qsph_r0 = 0.0 - qsph_h = 0.0 - - [units] - ppc0 = 1.0 - larmor0 = 2.0 - skindepth0 = 1.0 - - [particles] - n_species = 2 - - [species_1] - label = "e-" - mass = 1.0 - charge = -1.0 - maxnpart = 1e8 - - [species_2] - label = "e+" - mass = 25.0 - charge = 1.0 - maxnpart = 1e8 - - [diagnostics] - blocking_timers = true - )"_toml -}; - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - ntt::CommandLineArguments cl_args; - cl_args.readCommandLineArguments(argc, argv); - toml::value inputdata; - - auto n_iter_str = cl_args.getArgument("-niter", "10"); - auto n_iter = std::stoi(std::string(n_iter_str)); - - if (cl_args.isSpecified("-input")) { - auto inputfilename = cl_args.getArgument("-input"); - inputdata = toml::parse(static_cast(inputfilename)); - } else { - inputdata = default_input; - } - auto sim = ntt::PIC(inputdata); - - auto params = *(sim.params()); - auto& mblock = sim.meshblock; - - { - auto extent = params.extent(); - sim.ResetSimulation(); - using namespace ntt; - const real_t sx1 = extent[1] - extent[0]; - const real_t sx2 = extent[3] - extent[2]; - Kokkos::parallel_for( - "InitFields", mblock.rangeActiveCells(), Lambda(ntt::index_t i1, ntt::index_t i2) { - set_em_fields_2d(mblock, i1, i2, EMfield, sx1, sx2); - }); - sim.Exchange(ntt::GhostCells::fields); - - ntt::InjectUniform( - params, sim.meshblock, { 1, 2 }, params.ppc0() * 0.5); - } - { - ntt::WaitAndSynchronize(); - - for (auto i { 0 }; i < n_iter; ++i) { - sim.StepForward(ntt::DiagFlags_Timers | ntt::DiagFlags_Species); - } - } - - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/cmake/config.cmake b/legacy/cmake/config.cmake deleted file mode 100644 index 304454dd5..000000000 --- a/legacy/cmake/config.cmake +++ /dev/null @@ -1,117 +0,0 @@ -# ----------------------------- Simulation engine ---------------------------- # -# function(set_engine engine_name) -# list(FIND simulation_engines ${engine_name} ENGINE_FOUND) - -# if(${ENGINE_FOUND} EQUAL -1) -# message(FATAL_ERROR "Invalid simulation engine: ${engine_name}\nValid options are: ${simulation_engines}") -# else() -# set(ENGINE_FLAG ${engine}_engine) -# string(TOUPPER ${ENGINE_FLAG} ENGINE_FLAG) -# add_compile_options("-D ${ENGINE_FLAG}") -# endif() - -# if(${engine_name} STREQUAL "sandbox") -# set(default_metric "minkowski" CACHE STRING "Default metric") -# set(metrics ${sr_metrics} ${gr_metrics} CACHE STRING "Metrics") -# elseif(${engine_name} STREQUAL "pic") -# set(default_metric "minkowski" CACHE STRING "Default metric") -# set(metrics ${sr_metrics} CACHE STRING "Metrics") -# elseif(${engine_name} STREQUAL "grpic") -# set(default_metric "qkerr_schild" CACHE STRING "Default metric") -# set(metrics ${gr_metrics} CACHE STRING "Metrics") -# endif() -# endfunction() - -# -------------------------------- Precision ------------------------------- # -function(set_precision precision_name) - list(FIND precisions ${precision_name} PRECISION_FOUND) - - if(${PRECISION_FOUND} EQUAL -1) - message(FATAL_ERROR "Invalid precision: ${precision_name}\nValid options are: ${precisions}") - endif() - - if(${precision_name} STREQUAL "single") - add_compile_options("-DSINGLE_PRECISION") - endif() -endfunction() - -# # --------------------------------- Metric --------------------------------- # -# function(set_metric metric_name) -# list(FIND metrics ${metric_name} METRIC_FOUND) - -# if(${METRIC_FOUND} EQUAL -1) -# message(FATAL_ERROR "Invalid metric: ${metric_name}\nValid options are: ${metrics}") -# else() -# set(METRIC_FLAG ${metric}_metric) -# string(TOUPPER ${METRIC_FLAG} METRIC_FLAG) -# set(SIMULATION_METRIC ${metric}) - -# add_compile_options("-D ${METRIC_FLAG}") -# add_compile_options("-D SIMULATION_METRIC=\"${SIMULATION_METRIC}\"") -# add_compile_options("-D METRIC_HEADER=\"metrics/${metric}.h\"") -# endif() -# endfunction() - -# ---------------------------- Problem generator --------------------------- # -function(set_problem_generator pgen_name engine_name) - if(NOT ${engine_name} STREQUAL "sandbox") - if (${engine_name} STREQUAL "pic") - set(ENGINE_DIRECTORY "srpic") - else() - set(ENGINE_DIRECTORY ${engine_name}) - endif() - if (${metric} STREQUAL "minkowski") - set(ENGINE_DIRECTORY ${ENGINE_DIRECTORY}-cart) - else() - set(ENGINE_DIRECTORY ${ENGINE_DIRECTORY}-axisym) - endif() - - set(PGEN_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/setups/${ENGINE_DIRECTORY}/) - file(GLOB_RECURSE PGENS ${PGEN_DIRECTORY}*.hpp) - set(problem_generators "") - set(problem_generators_full "") - - foreach(pgen_file ${PGENS}) - string(REPLACE ${PGEN_DIRECTORY} "" new_pgen ${pgen_file}) - string(REPLACE ".hpp" "" new_pgen ${new_pgen}) - list(APPEND problem_generators_full ${new_pgen}) - - string(REPLACE "/" ";" new_pgen_list ${new_pgen}) - list(GET new_pgen_list 0 pgen_left) - list(GET new_pgen_list 1 pgen_right) - if (${pgen_left} STREQUAL ${pgen_right}) - set(new_pgen ${pgen_left}) - endif() - - list(APPEND problem_generators ${new_pgen}) - endforeach() - list(APPEND problem_generators "dummy") - - if (${pgen_name} MATCHES "^.*\/.*$") - set(pgen_full ${pgen_name}) - string(REPLACE "/" ";" new_pgen_list ${pgen_name}) - list(GET new_pgen_list 0 pgen_left) - list(GET new_pgen_list 1 pgen_right) - if (${pgen_left} STREQUAL "temp") - list(APPEND problem_generators ${pgen_name}) - set(PGEN_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/) - endif() - elseif (${pgen_name} STREQUAL "dummy") - set(PGEN_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/setups/) - set(pgen_full ${pgen_name}) - else() - set(pgen_full ${pgen_name}/${pgen_name}) - endif() - - set(problem_generators ${problem_generators} CACHE STRING "Problem generators") - - list(FIND problem_generators ${pgen_name} PGEN_FOUND) - - if(${PGEN_FOUND} EQUAL -1) - message(FATAL_ERROR "Problem generator ${pgen_full}.hpp not found\nAvailable problem generators: ${problem_generators}.") - else() - add_compile_options("-D PGEN_HEADER=\"${PGEN_DIRECTORY}${pgen_full}.hpp\"") - set(PGEN_FOUND TRUE CACHE BOOL "Problem generator found") - endif() - endif() -endfunction() \ No newline at end of file diff --git a/legacy/configure.py b/legacy/configure.py deleted file mode 100644 index 54c41ef24..000000000 --- a/legacy/configure.py +++ /dev/null @@ -1,421 +0,0 @@ -# ----------------------------------------------------------------------------------------- -# Configure file for the `Entity` code to generate a temporary `Makefile`. -# -# Options: -# -h --help help message -# -# [ Compilation flags ] -# -verbose enable verbose compilation mode -# -debug compile in `debug` mode -# --compiler= compiler used (can be a valid path to the binary) -# --build= specify building directory -# --bin= specify directory for executables -# -# [ Nttiny flags ] -# -nttiny enable visualizer compilation -# --nttiny_path= specify path for `Nttiny` (relative to current dir or absolute) -# -# [ Simulation flags ] -# --pgen= specify the problem generator to be used -# --precision=[single|double] floating point precision used [default: single] -# --metric= select metric to be used [default: minkowski] -# --simtype= select simulation type [default: pic] -# -# [ Kokkos-specific flags ] -# --kokkos_devices= `Kokkos` devices -# --kokkos_arch= `Kokkos` architecture -# --kokkos_options= `Kokkos` options -# --kokkos_cuda_options= `Kokkos` Cuda options -# ---------------------------------------------------------------------------------------- - -import argparse -import glob -import re -import subprocess -import os -import sys -import textwrap -from pathlib import Path - -assert sys.version_info >= (3, 7), "Requires python 3.7 or higher" - -# Global Settings -# --------------- -# Default values: -DEF_build_dir = 'build' -DEF_bin_dir = 'bin' -DEF_compiler = 'g++' -DEF_cppstandard = 'c++17' - -# Set template and output filenames -makefile_input = 'Makefile.in' -makefile_output = 'Makefile' - -# Options: -Precision_options = ['double', 'single'] -Metric_options = ['minkowski', 'spherical', - 'qspherical', 'kerr_schild', 'qkerr_schild'] -Simtype_options = ['pic', 'grpic'] - - -def findFiles(directory, extension): - return glob.glob(directory + '/*/*.' + extension) + glob.glob(directory + '/*.' + extension) - - -Pgen_options = [f.replace('.hpp', '').replace('\\', '/').replace('pgen/', '') - for f in findFiles('pgen', 'hpp')] -Kokkos_devices = dict(host=['Serial', 'OpenMP', 'PThreads'], device=['Cuda']) -Kokkos_arch = dict(host=["AMDAVX", "EPYC", "ARMV80", "ARMV81", "ARMV8_THUNDERX", - "ARMV8_THUNDERX2", "WSM", "SNB", "HSW", "BDW", "SKX", - "KNC", "KNL", "BGQ", "POWER7", "POWER8", "POWER9"], - device=["KEPLER30", "KEPLER32", "KEPLER35", "KEPLER37", - "MAXWELL50", "MAXWELL52", "MAXWELL53", "PASCAL60", - "PASCAL61", "VOLTA70", "VOLTA72", "TURING75", - "AMPERE80", "VEGA900", "VEGA906", "INTEL_GE"]) -Kokkos_devices_options = Kokkos_devices["host"] + Kokkos_devices["device"] -Kokkos_arch_options = Kokkos_arch["host"] + Kokkos_arch["device"] -Kokkos_loop_options = ['default', '1DRange', - 'MDRange', 'TP-TVR', 'TP-TTR', 'TP-TTR-TVR', 'for'] - -# . . . auxiliary functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . --> -use_nvcc_wrapper = False - - -def findCompiler(compiler): - find_command = subprocess.run( - ['which', compiler], capture_output=True, text=True) - return find_command.stdout.strip() if (find_command.returncode == 0) else 'N/A' - - -def pathNotEmpty(path): - ls_path = subprocess.run(['ls', path], capture_output=True, text=True) - return path if (ls_path.returncode == 0) else 'N/A' - - -def defineOptions(): - parser = argparse.ArgumentParser() - # compilation - parser.add_argument('-verbose', action='store_true', - default=False, help='enable verbose compilation mode') - parser.add_argument('--build', default=DEF_build_dir, - help='specify building directory') - parser.add_argument('--bin', default=DEF_bin_dir, - help='specify directory for executables') - parser.add_argument('--compiler', default=DEF_compiler, - help='choose the compiler') - parser.add_argument('-debug', action='store_true', - default=False, help='compile in `debug` mode') - - # visualizer - parser.add_argument('-nttiny', action='store_true', - default=False, help='enable nttiny visualizer compilation') - parser.add_argument('--nttiny_path', default="extern/nttiny", - help='specify path for `Nttiny`') - - # simulation - parser.add_argument('--precision', default='single', - choices=Precision_options, help='code precision (default: `single`)') - parser.add_argument( - '--metric', default=Metric_options[0], choices=Metric_options, help='select metric to be used (default: `minkowski`)') - parser.add_argument( - '--simtype', default=Simtype_options[0], choices=Simtype_options, help='select simulation type (default: `pic`)') - parser.add_argument('--pgen', default="", choices=Pgen_options, - help='problem generator to be used (default: `ntt_dummy`)') - - # `Kokkos` specific - parser.add_argument( - '--kokkos_devices', default=Kokkos_devices['host'][0], help='`Kokkos` devices') - parser.add_argument('--kokkos_arch', default='', - help='`Kokkos` architecture') - parser.add_argument('--kokkos_options', default='', - help='`Kokkos` options') - parser.add_argument('--kokkos_cuda_options', default='', - help='`Kokkos` CUDA options') - return vars(parser.parse_args()) - - -def configureKokkos(arg, mopt): - global use_nvcc_wrapper - kokkos_configs = {} - - def parseArchDevice(carg, kokkos_list): - _ = carg.split(',') - assert len(_) <= 2, "Wrong arch/device specified" - if len(_) == 2: - _1, _2 = _ - if _2 in kokkos_list['host']: - _1 = _[1] - _2 = _[0] - return _1, _2 - elif len(_) == 1: - _1 = _[0] - _2 = None - if _1 in kokkos_list['device']: - # enabling openmp if CUDA is enabled - _2 = 'OpenMP' - elif (not (_1 in kokkos_list['host'])): - if _1 != '': - raise ValueError("Wrong arch/device specified") - else: - _1 = None - return _1, _2 - else: - return None, None - host_d, device_d = parseArchDevice(arg['kokkos_devices'], Kokkos_devices) - host_a, device_a = parseArchDevice(arg['kokkos_arch'], Kokkos_arch) - if host_d is not None: - assert (host_d in Kokkos_devices['host']), 'Wrong host' - kokkos_configs['devices'] = host_d - if device_d is not None: - assert (device_d in Kokkos_devices['device']), 'Wrong device' - kokkos_configs['devices'] += ',' + device_d - if host_a is not None: - assert (host_a in Kokkos_arch['host']), 'Wrong host architecture' - kokkos_configs['arch'] = host_a - if device_a is not None: - assert (device_a in Kokkos_arch['device']), 'Wrong device architecture' - try: - kokkos_configs['arch'] += ',' + device_a - except: - kokkos_configs['arch'] = device_a - - mopt['KOKKOS_DEVICES'] = kokkos_configs['devices'] - mopt['KOKKOS_ARCH'] = kokkos_configs.get('arch', '') - if 'Cuda' in kokkos_configs['devices']: - mopt['DEFINITIONS'] += '-DGPUENABLED ' - if 'OpenMP' in kokkos_configs['devices']: - mopt['DEFINITIONS'] += '-DOMPENABLED ' - - mopt['KOKKOS_OPTIONS'] = arg['kokkos_options'] - if mopt['KOKKOS_OPTIONS'] != '': - mopt['KOKKOS_OPTIONS'] += ',' - mopt['KOKKOS_OPTIONS'] += 'disable_deprecated_code' - - mopt['KOKKOS_CUDA_OPTIONS'] = arg['kokkos_cuda_options'] - - if 'Cuda' in mopt['KOKKOS_DEVICES']: - # using Cuda - mopt['KOKKOS_CUDA_OPTIONS'] = arg['kokkos_cuda_options'] - if mopt['KOKKOS_CUDA_OPTIONS'] != '': - mopt['KOKKOS_CUDA_OPTIONS'] += ',' - mopt['KOKKOS_CUDA_OPTIONS'] += 'enable_lambda' - - use_nvcc_wrapper = True - - # no MPI (TODO) - arg['nvcc_wrapper_cxx'] = arg['compiler'] - mopt['HOST_COMPILER'] = arg["nvcc_wrapper_cxx"] - mopt['COMPILER'] = f'NVCC_WRAPPER_DEFAULT_COMPILER={arg["nvcc_wrapper_cxx"]} '\ - + '${KOKKOS_PATH}/bin/nvcc_wrapper' - # + 'NVCC_WRAPPER_TMPDIR=${BUILD_DIR}/tmp '\ - # add with MPI here (TODO) - - settings = f''' - `Kokkos`: - {'Devices':30} {mopt['KOKKOS_DEVICES']} - {'Architecture':30} {mopt['KOKKOS_ARCH']} - {'Options':30} {mopt['KOKKOS_OPTIONS'] if mopt['KOKKOS_OPTIONS'] is not None else '-'} - {'Cuda options':30} {mopt['KOKKOS_CUDA_OPTIONS'] if mopt['KOKKOS_CUDA_OPTIONS'] is not None else '-'}''' - return settings - - -def createMakefile(m_in, m_out, mopt): - with open(m_in, 'r') as current_file: - makefile_template = current_file.read() - # print(makefile_template) - for key, val in mopt.items(): - makefile_template = makefile_template.replace(f'@{key}@', val) - if not args['nttiny']: - makefile_template = re.sub( - "# for nttiny />[\S\s]*? *', '', - re.sub(r'-[I|L|o|c].+?[ |>|$]', '', - re.sub(r'-([I|D|c|o|W|O|L]) ', r'-\1', - command[i + 1:])) - ).strip().split(' '))) - order = ['-std', '-D', '-W', '-l', '--diag', ''] - accounted_flags = [] - ordered_flags = {key: [] for key in order} - for o in order: - for flag in flags: - if (o in flag) and (not flag in accounted_flags): - accounted_flags.append(flag) - ordered_flags[o].append(re.sub('-D', '-D ', flag)) - fstring = "" - fstring += " " + cmd + "\n" - for o in order: - fstring += " " - for f in ordered_flags[o]: - fstring += f + " " - fstring += "\n" - fstring = "".join(filter(str.strip, fstring.splitlines(True)))[:-1] - return fstring - -# add some useful notes - - -def makeNotes(): - notes = '' - cxx = args['nvcc_wrapper_cxx'] if use_nvcc_wrapper else makefile_options['COMPILER'] - if use_nvcc_wrapper: - notes += f"* nvcc recognized as:\n $ {findCompiler('nvcc')}\n " - notes += f"* {'nvcc wrapper ' if use_nvcc_wrapper else ''}compiler recognized as:\n $ {findCompiler(cxx)}\n " - if 'OpenMP' in args['kokkos_devices']: - notes += f"* when using OpenMP set the following environment variables:\n $ export OMP_PROC_BIND=spread OMP_PLACES=threads\n " - if args['nttiny']: - notes += f"* `nttiny` path:\n $ {pathNotEmpty(args['nttiny_path'])}" - return notes.strip() - - -short_compiler = ( - f"nvcc_wrapper [{args['nvcc_wrapper_cxx']}]" if use_nvcc_wrapper else makefile_options['COMPILER']) - -full_command = " ".join(sys.argv[:]) - -# Finish with diagnostic output -w = 80 -full_command = ' \\\n'.join(textwrap.wrap(full_command, w - 4, - subsequent_indent=" ", - initial_indent=" ")) -report = f''' -{'':=<{w}} - __ __ - /\ \__ __/\ \__ - __ ___\ \ _\/\_\ \ _\ __ __ - / __ \/ _ \ \ \/\/\ \ \ \/ /\ \/\ \\ -/\ __//\ \/\ \ \ \_\ \ \ \ \_\ \ \_\ \ __ -\ \____\ \_\ \_\ \__\\\\ \_\ \__\\\\ \____ \/\_\\ - \/____/\/_/\/_/\/__/ \/_/\/__/ \/___/ \/_/ - /\___/ - \/__/ - -{'':=<{w}} -{'Full configure command ':.<{w}} - -{full_command} - -{'Setup configurations ':.<{w}} - - {'Simulation type':32} {args['simtype'].upper()} - {'Problem generator':32} {args['pgen'] if args['pgen'] != '' else '--'} - {'Precision':32} {args['precision']} - {'Metric':32} {args['metric']} - -{'Physics ':.<{w}} - -{'Technical details ':.<{w}} - - {'Compiler':32} {short_compiler} - {'Debug mode':32} {args['debug']} - {Kokkos_details} - -{'Notes ':.<{80}} - - {makeNotes()} - -{'Compilation command ':.<{w}} - -{beautifyCommands(compiledemo)} - -{'Linking command ':.<{w}} - -{beautifyCommands(linkdemo)} - -{'':=<{w}} -''' - -print(report) - -with open(args['build'] + "/REPORT", 'w') as reportfile: - reportfile.write(report) diff --git a/legacy/deploy/aux/argparse.sh b/legacy/deploy/aux/argparse.sh deleted file mode 100644 index a6cb18f0d..000000000 --- a/legacy/deploy/aux/argparse.sh +++ /dev/null @@ -1,45 +0,0 @@ -while [ $# -gt 0 ]; do - if [[ $1 == "--help" ]] || [[ $1 == "-h" ]]; then - usage - exit 0 - elif [[ $1 == "--"* ]]; then - v="${1/--/}" - v=$(echo $v | sed 's/-/_/g') - if [[ -z "$2" ]] || [[ "$2" == --* ]] || [[ "$2" == -* ]]; then - if [[ "$1" != "--deploy" ]] && [[ "$1" != "--verbose" ]]; then - printf "\n${RED}Invalid option: $1${NC}\n" - exit 1 - fi - declare "$v"="ON" - else - declare "$v"="$2" - shift - fi - elif [[ $1 == "-"* ]]; then - if [[ -z "$2" ]] || [[ "$2" == --* ]] || [[ "$2" == -* ]]; then - if [[ $1 == "-v" ]]; then - verbose="ON" - elif [[ $1 == "-d" ]]; then - deploy="ON" - else - printf "\n${RED}Invalid option: $1${NC}\n" - exit 1 - fi - else - printf "\n${RED}Invalid option: $1${NC}\n" - exit 1 - fi - fi - shift -done - -# manage invalid options -if [ $with_cuda = "ON" ]; then - printf "\n${RED}Please specify CUDA path or modulename${NC}\n" - exit 1 -fi - -if [ $with_mpi = "ON" ]; then - printf "\n${RED}Please specify MPI path or modulename${NC}\n" - exit 1 -fi \ No newline at end of file diff --git a/legacy/deploy/aux/aux.sh b/legacy/deploy/aux/aux.sh deleted file mode 100644 index 11a238803..000000000 --- a/legacy/deploy/aux/aux.sh +++ /dev/null @@ -1,86 +0,0 @@ -rtouch() { - mkdir -p $(sed 's/\(.*\)\/.*/\1/' <<<$1) && touch $1 -} - -function runcommand { - if [ $deploy = "OFF" ]; then - echo ": $1" - else - if [ $verbose = "ON" ]; then - eval "$1" - else - eval "$1" >>${logfile} 2>>${logfile} - fi - fi -} - -declare -x FRAME -declare -x FRAME_INTERVAL - -set_spinner() { - case $1 in - spinner1) - FRAME=("⠋" "⠙" "⠹" "⠸" "⠼" "⠴" "⠦" "⠧" "⠇" "⠏") - FRAME_INTERVAL=0.1 - ;; - spinner2) - FRAME=("-" "\\" "|" "/") - FRAME_INTERVAL=0.25 - ;; - spinner3) - FRAME=("◐" "◓" "◑" "◒") - FRAME_INTERVAL=0.5 - ;; - spinner4) - FRAME=(":(" ":|" ":)" ":D") - FRAME_INTERVAL=0.5 - ;; - spinner5) - FRAME=("◇" "◈" "◆") - FRAME_INTERVAL=0.5 - ;; - spinner6) - FRAME=("⚬" "⚭" "⚮" "⚯") - FRAME_INTERVAL=0.25 - ;; - spinner7) - FRAME=("░" "▒" "▓" "█" "▓" "▒") - FRAME_INTERVAL=0.25 - ;; - spinner8) - FRAME=("☉" "◎" "◉" "●" "◉") - FRAME_INTERVAL=0.1 - ;; - spinner9) - FRAME=("❤" "♥" "♡") - FRAME_INTERVAL=0.15 - ;; - spinner10) - FRAME=("✧" "☆" "★" "✪" "◌" "✲") - FRAME_INTERVAL=0.1 - ;; - spinner11) - FRAME=("●" "◕" "☯" "◔" "◕") - FRAME_INTERVAL=0.25 - ;; - *) - echo "No spinner is defined for $1" - exit 1 - ;; - esac -} - -function is_gpu_arch { - local ar=$1 - if [[ $ar == "VOLTA"* ]] || [[ $ar == "TURING"* ]] || [[ $ar == "AMPERE"* ]] || [[ $ar == "MAXWELL"* ]] || [[ $ar == "PASCAL"* ]] || [[ $ar == "KEPLER"* ]] || [[ $ar == "INTEL"* ]] || [[ $ar == "VEGA"* ]] || [[ $ar == "NAVI"* ]]; then - echo "TRUE" - else - echo "FALSE" - fi -} - -GRAY='\033[0;30m' -GREEN='\033[0;32m' -RED='\033[0;31m' -BLUE='\033[0;34m' -NC='\033[0m' diff --git a/legacy/deploy/aux/config.sh b/legacy/deploy/aux/config.sh deleted file mode 100644 index a2324f286..000000000 --- a/legacy/deploy/aux/config.sh +++ /dev/null @@ -1,73 +0,0 @@ -declare -x writing_modulefile="ON" -declare -x use_modules="OFF" -declare -x enable_cuda="OFF" -declare -x install_path=${install_prefix}/${modulename_lower} - -if [ -z $has_modulefile ]; then - writing_modulefile="OFF" -else - modnm=$(eval echo "\${${modulename_lower}_module}") - if [[ $has_modulefile = "OFF" || $modnm = "OFF" ]]; then - writing_modulefile="OFF" - fi -fi - -if [ ! $with_cuda = "OFF" ]; then - enable_cuda="ON" - if [[ $with_cuda == module:* ]]; then - cuda_module=$(echo $with_cuda | cut -d':' -f2) - use_modules="ON" - else - cuda_path=$with_cuda - fi -fi - -if [[ $with_cc == module:* ]]; then - cc_module=$(echo $with_cc | cut -d':' -f2) - use_modules="ON" -fi - -if [ ! $with_mpi = "OFF" ]; then - if [[ $with_mpi == module:* ]]; then - mpi_module=$(echo $with_mpi | cut -d':' -f2) - use_modules="ON" - if [ ! $with_cuda = "OFF" ]; then - mpi_module=$mpi_module/cuda - else - mpi_module=$mpi_module/cpu - fi - else - mpi_path=$with_mpi - fi -fi - -function define_kokkos_suffix { - local arch_raw=$1 - local suffix="" - declare -xa archs - IFS=',' read -ra archs <<<"$arch_raw" - - for ar in "${archs[@]}"; do - if [ $ar = "AUTO" ]; then - break - fi - is_gpu=$(is_gpu_arch $ar) - if [ $is_gpu = "TRUE" ]; then - if [ $enable_cuda != "ON" ]; then - printf "\n${RED}GPU architecture $ar is specified but CUDA is not enabled${NC}\n" - exit 1 - fi - suffix+="/${ar,,}" - fi - done - for ar in "${archs[@]}"; do - if [ $ar = "AUTO" ]; then - break - fi - is_gpu=$(is_gpu_arch $ar) - if [ $is_gpu != "TRUE" ]; then - suffix+="/${ar,,}" - fi - done - echo $suffix -} diff --git a/legacy/deploy/aux/default.sh b/legacy/deploy/aux/default.sh deleted file mode 100644 index 186595b7d..000000000 --- a/legacy/deploy/aux/default.sh +++ /dev/null @@ -1,21 +0,0 @@ -default_with_cuda="module:cudatoolkit/12.0" -declare with_cuda="${default_with_cuda}" - -default_with_cc="module:gcc-toolset/10" -declare with_cc="${default_with_cc}" - -default_install_prefix="${HOME}/opt" -declare install_prefix="${default_install_prefix}" - -default_module_path="${HOME}/opt/.modules" -default_src_path="${HOME}/opt/src" - -declare -r modulename_lower=${modulename,,} - -declare ${modulename_lower}_module="${default_module_path}/${modulename_lower}" -declare ${modulename_lower}_src_path="${default_src_path}/${modulename_lower}" - -declare deploy="OFF" -declare verbose="OFF" - -declare -r programname=$0 \ No newline at end of file diff --git a/legacy/deploy/aux/globals.sh b/legacy/deploy/aux/globals.sh deleted file mode 100644 index b67fb917a..000000000 --- a/legacy/deploy/aux/globals.sh +++ /dev/null @@ -1,39 +0,0 @@ -declare -r logfile="$(pwd)/${modulename_lower}.log" - -function common_help { - echo "" - echo "Build and install $modulename" - echo "" - echo "usage: bash $programname --deploy -v --install_prefix ... [options]" - echo "" - echo " -h, --help print this help message" - echo "" - echo " -d, --deploy execute the script" - echo " -d OFF shows the command to be executed" - echo " (default: $deploy)" - echo "" - echo " -v, --verbose whether to print the compilation progress or not" - echo " (default: $verbose)" - echo "" - echo " --install_prefix path to $modulename installation directory" - echo " (default: $default_install_prefix)" - echo "" - printf "%-33s%s" " --${modulename_lower}_src_path " "path to ${modulename} source directory" - echo "" - echo " (default: $default_src_path/${modulename_lower})" - if [ ! -z $has_modulefile ]; then - echo "" - printf "%-33s%s" " --${modulename_lower}_module " "path to ${modulename} module" - echo "" - echo " set to OFF to disable modulefile installation" - echo " (default: $default_module_path/${modulename_lower})" - fi - echo "" - echo " --with-cc C compiler path or modulename (\`module:\`)" - echo " (default: $default_with_cc)" - echo "" - echo " --with-cuda CUDA path or modulename via (\`module:\`)" - echo " set to OFF to disable CUDA support" - echo " (default: $default_with_cuda)" - echo "" -} diff --git a/legacy/deploy/aux/run.sh b/legacy/deploy/aux/run.sh deleted file mode 100644 index 9322586fb..000000000 --- a/legacy/deploy/aux/run.sh +++ /dev/null @@ -1,172 +0,0 @@ -declare -x REPORT_VARS=() -declare -x REPORT_VALS=() - -function common_report { - # Report cuda - if [ $enable_cuda = "ON" ]; then - if [ $use_modules = "ON" ]; then - REPORT_VALS=( - "module:${cuda_module}" - "${REPORT_VALS[@]}" - ) - else - REPORT_VALS=( - "${with_cuda}" - "${REPORT_VALS[@]}" - ) - fi - REPORT_VARS=( - "CUDA" - "${REPORT_VARS[@]}" - ) - fi - - # Report C compiler - REPORT_VARS=( - "C compiler" - "${REPORT_VARS[@]}" - ) - if [ $use_modules = "ON" ]; then - REPORT_VALS=( - "module:${cc_module}" - "${REPORT_VALS[@]}" - ) - else - local cc=$(which gcc) - REPORT_VALS=( - "${cc}" - "${REPORT_VALS[@]}" - ) - fi - - # Report modulefile path - if [ $writing_modulefile != "OFF" ]; then - local module_dir=${modulename_lower}_module - REPORT_VARS=( - "modulefile path" - "${REPORT_VARS[@]}" - ) - REPORT_VALS=( - "${!module_dir}" - "${REPORT_VALS[@]}" - ) - fi - - # Report src & install directory - local src_path=${modulename_lower}_src_path - REPORT_VARS=( - "src directory" - "install directory" "${REPORT_VARS[@]}" - ) - REPORT_VALS=( - "${!src_path}" - "${install_path}" "${REPORT_VALS[@]}" - ) - - for i in "${!REPORT_VARS[@]}"; do - printf " %-25s%s\n" "${REPORT_VARS[i]}:" "${REPORT_VALS[i]}" - done - echo "" -} - -run() { - local configure="$1" - local build="$2" - local install="$3" - local cleanup="$4" - if [ $writing_modulefile != "OFF" ]; then - if [[ -z $5 ]]; then - printf "\n${RED}Missing modulefile function${NC}\n" - exit 1 - else - modulefile="$5" - fi - if [[ -z $6 ]]; then - printf "\n${RED}Missing report function${NC}\n" - exit 1 - else - report="$6" - fi - else - if [[ -z $5 ]]; then - report="" - else - report="$5" - fi - fi - - printf "Installing ${BLUE}${modulename}${NC}\n" - eval $report - common_report - - set_spinner spinner1 - declare -x STEPS=( - 'configure' - 'build' - 'install' - 'cleanup' - ) - declare -x CMDS=( - 'eval $configure' - 'eval $build' - 'eval $install' - 'eval $cleanup' - ) - if [ $writing_modulefile != "OFF" ]; then - STEPS+=( - 'modulefile' - ) - CMDS+=( - 'eval $modulefile' - ) - fi - - local step=0 - rm -f $logfile - - tput civis -- invisible - - while [ "$step" -lt "${#CMDS[@]}" ]; do - ${CMDS[$step]} & - pid=$! - - if [ $verbose = "OFF" ]; then - while ps -p $pid &>/dev/null; do - echo -ne "\\r[ ] ${STEPS[$step]}" - - for k in "${!FRAME[@]}"; do - echo -ne "\\r[ ${FRAME[k]} ]" - sleep $FRAME_INTERVAL - done - done - fi - wait $pid - local exitcode=$? - - if [ $exitcode -eq 0 ]; then - if [ $deploy = "OFF" ]; then - # draw up arrow - echo -ne "\\r[ ${BLUE}↑${NC} ] ${STEPS[$step]}\\n" - echo "" - else - echo -ne "\\r[ ${GREEN}✔${NC} ] ${STEPS[$step]}\\n" - fi - else - echo -ne "\\r[ ${RED}✘${NC} ] ${STEPS[$step]}\\n" - echo "Failed to install ${modulename} :(" - echo "see ${logfile} for more details" - exit 1 - fi - step=$((step + 1)) - done - - tput cnorm -- normal - - if [ $deploy = "ON" ]; then - echo - printf "${BLUE}${modulename}${NC} succesfully installed in ${install_prefix}/${modulename_lower}!\n" - else - echo - printf "now run \`${BLUE}bash ${programname} -d${NC}\` to execute the script\n" - fi -} diff --git a/legacy/deploy/compile_adios2.sh b/legacy/deploy/compile_adios2.sh deleted file mode 100644 index c27620820..000000000 --- a/legacy/deploy/compile_adios2.sh +++ /dev/null @@ -1,268 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) - -source ${SCRIPT_DIR}/aux/aux.sh - -declare -r modulename="ADIOS2" -declare -r has_modulefile="ON" - -default_debug="OFF" -declare debug="${default_debug}" -default_arch="AUTO" -declare arch="${default_arch}" -default_mpi_path="module:ompi" -declare with_mpi="${default_mpi_path}" -default_hdf5_path="module:hdf5" -declare hdf5="${default_hdf5_path}" -default_kokkos_path="module:kokkos" -declare kokkos="${default_kokkos_path}" - -source ${SCRIPT_DIR}/aux/default.sh -source ${SCRIPT_DIR}/aux/globals.sh - -function usage { - common_help - echo " --with-mpi MPI path or modulename (\`module:\`)" - echo " set to OFF to disable MPI support" - echo " (default: ${default_mpi_path})" - echo "" - echo " --arch Hardware architecture for Kokkos" - echo " comma-separated list of CPU and/or GPU archs" - echo " for example: \`SKX,Volta70\`" - echo " (default: ${default_arch})" - echo "" - echo " --kokkos Kokkos path or modulename (\`module:\`)" - echo " default: ${default_kokkos_path}/" - echo "" - echo " --hdf5 HDF5 path or modulename (\`module:\`)" - echo " default: ${default_hdf5_path}/" - echo "" - echo " --debug Build in debug mode" - echo " (default: $debug)" - echo "" -} - -source ${SCRIPT_DIR}/aux/argparse.sh -source ${SCRIPT_DIR}/aux/config.sh - -if [[ $hdf5 = module:* ]]; then - hdf5_module=${hdf5#module:} -fi - -if [[ $kokkos = module:* ]]; then - kokkos_module=${kokkos#module:} -fi - -if [ $arch = "AUTO" ]; then - printf "${RED}Automatic architecture detection is not supported for ADIOS2${NC}\n" - exit 1 -fi - -if [ $debug = "ON" ]; then - adios2_module+="/debug" - install_path+="/debug" - kokkos_module+="/debug" -fi - -if [ $with_mpi != "OFF" ]; then - adios2_module+="/mpi" - install_path+="/mpi" - if [ $hdf5 == $default_hdf5_path ]; then - hdf5_module+="/mpi" - if [ $enable_cuda = "ON" ]; then - hdf5_module+="/cuda" - else - hdf5_module+="/cpu" - fi - fi - kokkos_module+="/mpi" -else - if [ $hdf5 == $default_hdf5_path ]; then - hdf5_module+="/serial" - fi -fi - -if [ $enable_cuda = "ON" ]; then - adios2_module+="/cuda" - install_path+="/cuda" - kokkos_module+="/cuda" -fi - -suffix=$(define_kokkos_suffix $arch) -adios2_module+=$suffix -install_path+=$suffix -kokkos_module+=$suffix - -flags=() - -if [ $enable_cuda = "ON" ]; then - flags+=( - ADIOS2_USE_CUDA=ON - ) -fi - -if [ $with_mpi != "OFF" ]; then - flags+=( - ADIOS2_USE_MPI=ON - ) -else - flags+=( - ADIOS2_USE_MPI=OFF - ADIOS2_HAVE_HDF5_VOL=OFF - ) -fi - -compile_args=( - -D CMAKE_CXX_STANDARD=17 - -D CMAKE_CXX_EXTENSIONS=OFF - -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE - -D BUILD_SHARED_LIBS=ON - - -D ADIOS2_USE_HDF5=ON - -D ADIOS2_USE_Kokkos=ON - - -D ADIOS2_USE_Python=OFF - -D ADIOS2_USE_Fortran=OFF - -D ADIOS2_USE_ZeroMQ=OFF - -D BUILD_TESTING=OFF - -D ADIOS2_BUILD_EXAMPLES=OFF - - -D CMAKE_INSTALL_PREFIX=$install_path -) - -if [ $debug = "ON" ]; then - compile_args+=( - -D CMAKE_BUILD_TYPE=Debug - ) -fi -for flag in "${flags[@]}"; do - compile_args+=( - -D $flag - ) -done - -source ${SCRIPT_DIR}/aux/run.sh - -function prebuild { - if [ $use_modules = "ON" ]; then - runcommand "module purge" - runcommand "module load $cc_module" - if [ $enable_cuda = "ON" ]; then - runcommand "module load $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - runcommand "module load $mpi_module" - fi - runcommand "module load $hdf5_module" - runcommand "module load $kokkos_module" - fi -} - -function configure { - prebuild - runcommand "cd $adios2_src_path" - runcommand "rm -rf build" - local args=$(printf " %s" "${compile_args[@]}") - runcommand "cmake -B build$args" -} - -function compile { - prebuild - runcommand "cd $adios2_src_path" - runcommand "cmake --build build -j" -} - -function install { - runcommand "cd $adios2_src_path" - runcommand "cmake --install build" -} - -function cleanup { - runcommand "cd $adios2_src_path" - runcommand "rm -rf build" -} - -function report { - if [ ! $with_mpi = "OFF" ]; then - REPORT_VARS+=( - "MPI" - ) - REPORT_VALS+=( - "${with_mpi}" - ) - fi - REPORT_VARS+=( - "Kokkos" - "HDF5" - ) - - REPORT_VALS+=( - "${kokkos}" - "${hdf5}" - ) - REPORT_VARS+=( - "Architecture(s)" - "Debug mode" - ) - REPORT_VALS+=( - "${arch}" - "${debug}" - ) -} - -function modulefile { - fname=$adios2_module - description="ADIOS2" - if [ $with_mpi != "OFF" ]; then - description=$description" @ MPI" - fi - if [ $enable_cuda = "ON" ]; then - description=$description" @ CUDA" - fi - for ar in "${archs[@]}"; do - if [ $ar = "AUTO" ]; then - break - fi - description=$description" @ ${ar}" - done - prereqs="" - if [ $use_modules = "ON" ]; then - prereqs+="prereq\t\t$cc_module" - if [ $enable_cuda = "ON" ]; then - prereqs+=" $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - prereqs+=" $mpi_module" - fi - prereqs+=" $hdf5_module $kokkos_module" - fi - local setflags="" - for flag in "${flags[@]}"; do - local setflag=$(echo $flag | sed 's/=/\t\t/') - setflags+="\nsetenv\t$setflag" - done - runcommand "mkdir -p $(dirname $fname)" - runcommand "rm -f $fname" - runcommand "echo \"Writing modulefile to $fname\"" - modulecontent='''#%Module1.0###################################################################### -## -## $description -## -proc ModulesHelp { } { - puts stderr \t\"$description\"\n -} -module-whatis \"$description\" - -conflict adios2 -$prereqs - -set basedir $install_path -append-path PATH \$basedir/bin -setenv adios2_DIR \$basedir -$setflags - ''' - runcommand "echo -e \"$modulecontent\" >>$fname" -} - -run configure compile install cleanup modulefile report diff --git a/legacy/deploy/compile_hdf5.sh b/legacy/deploy/compile_hdf5.sh deleted file mode 100644 index 3d10ff63d..000000000 --- a/legacy/deploy/compile_hdf5.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) - -source ${SCRIPT_DIR}/aux/aux.sh - -declare -r modulename="HDF5" -declare -r has_modulefile="ON" - -default_mpi_path="module:ompi" -declare with_mpi="${default_mpi_path}" - -source ${SCRIPT_DIR}/aux/default.sh -source ${SCRIPT_DIR}/aux/globals.sh - -function usage { - common_help - echo " --with-mpi MPI path or modulename (\`module:\`)" - echo " set to OFF to disable MPI support" - echo " (default: ${default_mpi_path})" - echo "" -} - -source ${SCRIPT_DIR}/aux/argparse.sh -source ${SCRIPT_DIR}/aux/config.sh - -if [ ! $with_mpi = "OFF" ]; then - hdf5_module+="/mpi" - install_path+="/mpi" - if [ $enable_cuda = "ON" ]; then - hdf5_module+="/cuda" - install_path+="/cuda" - else - hdf5_module+="/cpu" - install_path+="/cpu" - fi -else - hdf5_module+="/serial" - install_path+="/serial" -fi - -if [ ! $with_mpi = "OFF" ]; then - compile_args=( - -S HDF5config.cmake,HPC=sbatch,MPI=true,BUILD_GENERATOR=Unix,INSTALLDIR=$install_path - ) -else - compile_args=( - -S HDF5config.cmake,HPC=sbatch,BUILD_GENERATOR=Unix,INSTALLDIR=$install_path - ) -fi -compile_args+=( - -C Release - -V - -O hdf5.log -) - -source ${SCRIPT_DIR}/aux/run.sh - -function prebuild { - if [ $use_modules = "ON" ]; then - runcommand "module purge" - runcommand "module load $cc_module" - if [ $enable_cuda = "ON" ]; then - runcommand "module load $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - runcommand "module load $mpi_module" - fi - fi -} - -function configure { - : # No configuration needed -} - -function compile { - prebuild - runcommand "cd $hdf5_src_path" - runcommand "rm -rf build" - local args=$(printf " %s" "${compile_args[@]}") - runcommand "ctest$args" -} - -function install { - runcommand "module list" - runcommand "cd $hdf5_src_path/build" - runcommand "make install" - runcommand "cd HDF5_ZLIB-prefix/src/HDF5_ZLIB-build" - runcommand "make install" - runcommand "cd ../../../SZIP-prefix/src/SZIP-build" - runcommand "make install" -} - -function cleanup { - runcommand "cd $hdf5_src_path" - runcommand "rm -rf build" -} - -function report { - if [ ! $with_mpi = "OFF" ]; then - REPORT_VARS+=( - "MPI" - ) - REPORT_VALS+=( - "${with_mpi}" - ) - fi -} - -function modulefile { - fname=$hdf5_module - description="HDF5" - if [ ! $with_mpi = "OFF" ]; then - description=$description" @ MPI" - fi - if [ $enable_cuda = "ON" ]; then - description=$description" @ CUDA" - fi - prereqs="" - if [ $use_modules = "ON" ]; then - prereqs+="prereq\t\t$cc_module" - if [ $enable_cuda = "ON" ]; then - prereqs+=" $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - prereqs+=" $mpi_module" - fi - fi - runcommand "mkdir -p $(dirname $fname)" - runcommand "rm -f $fname" - runcommand "echo \"Writing modulefile to $fname\"" - modulecontent='''#%Module1.0###################################################################### -## -## $description -## -proc ModulesHelp { } { - puts stderr \t\"$description\"\n -} -module-whatis \"$description\" - -conflict hdf5 -$prereqs - -set basedir $install_path -prepend-path PATH \$basedir/bin -prepend-path LD_LIBRARY_PATH \$basedir/lib -prepend-path LIBRARY_PATH \$basedir/lib -prepend-path MANPATH \$basedir/man -prepend-path HDF5_ROOT \$basedir -prepend-path HDF5DIR \$basedir -append-path -d { } LDFLAGS -L\$basedir/lib -append-path -d { } INCLUDE -I\$basedir/include -append-path CPATH \$basedir/include -append-path -d { } FFLAGS -I\$basedir/include -append-path -d { } FCFLAGS -I\$basedir/include -append-path -d { } LOCAL_LDFLAGS -L\$basedir/lib -append-path -d { } LOCAL_INCLUDE -I\$basedir/include -append-path -d { } LOCAL_CFLAGS -I\$basedir/include -append-path -d { } LOCAL_FFLAGS -I\$basedir/include -append-path -d { } LOCAL_FCFLAGS -I\$basedir/include -append-path -d { } LOCAL_CXXFLAGS -I\$basedir/include - ''' - runcommand "echo -e \"$modulecontent\" >>$fname" -} - -run configure compile install cleanup modulefile report diff --git a/legacy/deploy/compile_kokkos.sh b/legacy/deploy/compile_kokkos.sh deleted file mode 100644 index 6c7b8c530..000000000 --- a/legacy/deploy/compile_kokkos.sh +++ /dev/null @@ -1,215 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -source ${SCRIPT_DIR}/aux/aux.sh - -declare -r modulename="Kokkos" -declare -r has_modulefile="ON" - -default_debug="OFF" -declare debug="${default_debug}" -default_arch="AUTO" -declare arch="${default_arch}" -default_mpi_path="module:ompi" -declare with_mpi="${default_mpi_path}" - -source ${SCRIPT_DIR}/aux/default.sh -source ${SCRIPT_DIR}/aux/globals.sh - -function usage { - common_help - echo " --with-mpi MPI path or modulename (\`module:\`)" - echo " set to OFF to disable MPI support" - echo " (default: ${default_mpi_path})" - echo "" - echo " --arch Hardware architecture for Kokkos" - echo " comma-separated list of CPU and/or GPU archs" - echo " for example: \`SKX,Volta70\`" - echo " (default: ${default_arch})" - echo "" - echo " --debug Build in debug mode" - echo " (default: $debug)" - echo "" -} - -source ${SCRIPT_DIR}/aux/argparse.sh -source ${SCRIPT_DIR}/aux/config.sh - -declare -a archs -IFS=',' read -ra archs <<<"$arch" - -if [ $debug = "ON" ]; then - kokkos_module+="/debug" - install_path+="/debug" -fi - -if [ $with_mpi != "OFF" ]; then - kokkos_module+="/mpi" - install_path+="/mpi" -fi - -if [ $enable_cuda = "ON" ]; then - kokkos_module+="/cuda" - install_path+="/cuda" -fi - -suffix=$(define_kokkos_suffix $arch) -kokkos_module+=$suffix -install_path+=$suffix - -flags=() - -if [ $with_mpi = "OFF" ]; then - flags+=( - Kokkos_ENABLE_OPENMP=ON - ) -fi - -if [ $enable_cuda = "ON" ]; then - flags+=( - Kokkos_ENABLE_CUDA=ON - ) -fi - -for ar in "${archs[@]}"; do - if [ $ar = "AUTO" ]; then - break - fi - flags+=( - Kokkos_ARCH_${ar}=ON - ) -done - -if [ $debug = "ON" ]; then - flags+=( - Kokkos_ENABLE_DEBUG=ON - Kokkos_ENABLE_DEBUG_BOUNDS_CHECK=ON - ) -fi - -compile_args=( - -D CMAKE_CXX_STANDARD=17 - -D CMAKE_CXX_EXTENSIONS=OFF - -D CMAKE_POSITION_INDEPENDENT_CODE=TRUE - -D BUILD_SHARED_LIBS=ON - -D CMAKE_INSTALL_PREFIX=$install_path -) -for flag in "${flags[@]}"; do - compile_args+=( - -D $flag - ) -done - -source ${SCRIPT_DIR}/aux/run.sh - -function prebuild { - if [ $use_modules = "ON" ]; then - runcommand "module purge" - runcommand "module load $cc_module" - if [ $enable_cuda = "ON" ]; then - runcommand "module load $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - runcommand "module load $mpi_module" - fi - fi -} - -function configure { - prebuild - runcommand "cd $kokkos_src_path" - runcommand "rm -rf build" - local args=$(printf " %s" "${compile_args[@]}") - runcommand "cmake -B build$args" -} - -function compile { - prebuild - runcommand "cd $kokkos_src_path" - runcommand "cmake --build build -j" -} - -function install { - runcommand "cd $kokkos_src_path" - runcommand "cmake --install build" -} - -function cleanup { - runcommand "cd $kokkos_src_path" - runcommand "rm -rf build" -} - -function report { - if [ ! $with_mpi = "OFF" ]; then - REPORT_VARS+=( - "MPI" - ) - REPORT_VALS+=( - "${with_mpi}" - ) - fi - REPORT_VARS+=( - "Architecture(s)" - "Debug mode" - ) - REPORT_VALS+=( - "${arch}" - "${debug}" - ) -} - -function modulefile { - fname=$kokkos_module - description="Kokkos" - if [ $with_mpi != "OFF" ]; then - description=$description" @ MPI" - fi - if [ $enable_cuda = "ON" ]; then - description=$description" @ CUDA" - fi - for ar in "${archs[@]}"; do - if [ $ar = "AUTO" ]; then - break - fi - description=$description" @ ${ar}" - done - prereqs="" - if [ $use_modules = "ON" ]; then - prereqs+="prereq\t\t$cc_module" - if [ $enable_cuda = "ON" ]; then - prereqs+=" $cuda_module" - fi - if [ ! $with_mpi = "OFF" ]; then - prereqs+=" $mpi_module" - fi - fi - local setflags="" - for flag in "${flags[@]}"; do - local setflag=$(echo $flag | sed 's/=/\t\t/') - setflags+="\nsetenv\t$setflag" - done - runcommand "mkdir -p $(dirname $fname)" - runcommand "rm -f $fname" - runcommand "echo \"Writing modulefile to $fname\"" - modulecontent='''#%Module1.0###################################################################### -## -## $description -## -proc ModulesHelp { } { - puts stderr \t\"$description\"\n -} -module-whatis \"$description\" - -conflict kokkos -$prereqs - -set basedir $install_path -append-path PATH \$basedir/bin -setenv Kokkos_DIR \$basedir -$setflags - ''' - runcommand "echo -e \"$modulecontent\" >>$fname" -} - -run configure compile install cleanup modulefile report diff --git a/legacy/deploy/compile_ompi.sh b/legacy/deploy/compile_ompi.sh deleted file mode 100644 index 9a495f78f..000000000 --- a/legacy/deploy/compile_ompi.sh +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) - -source ${SCRIPT_DIR}/aux/aux.sh - -declare -r modulename="OMPI" -declare -r has_modulefile="ON" - -default_ucx_path="${HOME}/opt/ucx" -declare with_ucx="${default_ucx_path}" - -source ${SCRIPT_DIR}/aux/default.sh -source ${SCRIPT_DIR}/aux/globals.sh - -function usage { - common_help - echo " --with-ucx enable UCX support (specify installation path)" - echo " set to OFF to disable UCX support" - echo " (default: ${default_ucx_path})" - echo "" -} - -source ${SCRIPT_DIR}/aux/argparse.sh -source ${SCRIPT_DIR}/aux/config.sh - -if [ $enable_cuda = "ON" ]; then - install_path="${install_path}/cuda" - ompi_module="${ompi_module}/cuda" -else - install_path="${install_path}/cpu" - ompi_module="${ompi_module}/cpu" -fi - -compile_args=( - --prefix=${install_path} - --with-devel-headers -) - -if [ $use_modules = "ON" ]; then - if [ $enable_cuda = "ON" ]; then - compile_args+=( - --with-cuda=\$CUDA_HOME - ) - fi -else - if [ $enable_cuda = "ON" ]; then - compile_args+=( - --with-cuda=$with_cuda - ) - fi -fi - -if [ ! $with_ucx = "OFF" ]; then - if [ $enable_cuda = "ON" ]; then - compile_args+=( - --with-ucx=$with_ucx/cuda - ) - else - compile_args+=( - --with-ucx=$with_ucx/cpu - ) - fi -fi - -source ${SCRIPT_DIR}/aux/run.sh - -function prebuild { - if [ $use_modules = "ON" ]; then - runcommand "module purge" - runcommand "module load $cc_module" - runcommand "export CC=\$(which gcc) CXX=\$(which g++)" - if [ $enable_cuda = "ON" ]; then - runcommand "module load $cuda_module" - fi - fi -} - -function configure { - prebuild - runcommand "cd $ompi_src_path" - runcommand "rm -rf build" - runcommand "./autogen.pl" - runcommand "mkdir build" - runcommand "cd build" - local args=$(printf " %s" "${compile_args[@]}") - runcommand "../configure$args" -} - -function compile { - prebuild - runcommand "cd $ompi_src_path/build" - runcommand "make -j" -} - -function install { - runcommand "cd $ompi_src_path/build" - runcommand "make install" -} - -function cleanup { - runcommand "cd $ompi_src_path" - runcommand "rm -rf build" -} - -function report { - REPORT_VARS+=( - "UCX" - ) - REPORT_VALS+=( - "${with_ucx}" - ) -} - -function modulefile { - fname=$ompi_module - description="Open MPI" - if [ $enable_cuda = "ON" ]; then - description=$description" @ CUDA" - fi - prereqs="" - if [ $use_modules = "ON" ]; then - prereqs+="prereq\t\t$cc_module" - fi - runcommand "mkdir -p $(dirname $fname)" - runcommand "rm -f $fname" - runcommand "echo \"Writing modulefile to $fname\"" - modulecontent='''#%Module1.0###################################################################### -## -## $description -## -proc ModulesHelp { } { - puts stderr \t\"$description\"\n -} -module-whatis \"$description\" - -conflict ompi openmpi -$prereqs - -set basedir $install_path -prepend-path PATH \$basedir/bin -prepend-path LD_LIBRARY_PATH \$basedir/lib - -append-path -d { } LOCAL_LDFLAGS -L\$basedir/lib -append-path -d { } LOCAL_INCLUDE -I\$basedir/include -append-path -d { } LOCAL_CFLAGS -I\$basedir/include -append-path -d { } LOCAL_FCFLAGS -I\$basedir/include -append-path -d { } LOCAL_CXXFLAGS -I\$basedir/include - -setenv CXX \$basedir/bin/mpicxx -setenv CC \$basedir/bin/mpicc - -setenv SLURM_MPI_TYPE pmix_v3 -setenv MPIHOME \$basedir -setenv MPI_HOME \$basedir -setenv OPENMPI_HOME \$basedir - ''' - runcommand "echo -e \"$modulecontent\" >>$fname" -} - -run configure compile install cleanup modulefile report diff --git a/legacy/deploy/compile_ucx.sh b/legacy/deploy/compile_ucx.sh deleted file mode 100644 index fcbe4a2eb..000000000 --- a/legacy/deploy/compile_ucx.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) - -source ${SCRIPT_DIR}/aux/aux.sh - -declare -r modulename="UCX" -source ${SCRIPT_DIR}/aux/default.sh -source ${SCRIPT_DIR}/aux/globals.sh - -function usage { - common_help -} - -source ${SCRIPT_DIR}/aux/argparse.sh -source ${SCRIPT_DIR}/aux/config.sh - -if [ $enable_cuda = "ON" ]; then - install_path="${install_path}/cuda" -else - install_path="${install_path}/cpu" -fi - -compile_args=( - --prefix=$install_path -) - -if [ $use_modules = "ON" ]; then - if [ $enable_cuda = "ON" ]; then - compile_args+=( - --with-cuda=\$CUDA_HOME - ) - fi -else - if [ $enable_cuda = "ON" ]; then - compile_args+=( - --with-cuda=$cuda_path - ) - fi -fi - -source ${SCRIPT_DIR}/aux/run.sh - -function prebuild { - if [ $use_modules = "ON" ]; then - runcommand "module purge" - runcommand "module load $cc_module" - runcommand "export CC=\$(which gcc) CXX=\$(which g++)" - if [ $enable_cuda = "ON" ]; then - runcommand "module load $cuda_module" - fi - fi -} - -function configure { - prebuild - runcommand "cd $ucx_src_path" - runcommand "rm -rf build" - runcommand "./autogen.sh" - runcommand "mkdir build" - runcommand "cd build" - local args=$(printf " %s" "${compile_args[@]}") - runcommand "../configure$args" -} - -function compile { - prebuild - runcommand "cd $ucx_src_path/build" - runcommand "make -j" -} - -function install { - runcommand "cd $ucx_src_path/build" - runcommand "make install" -} - -function cleanup { - runcommand "cd $ucx_src_path" - runcommand "rm -rf build" -} - -run configure compile install cleanup diff --git a/legacy/deploy/deploy.py b/legacy/deploy/deploy.py deleted file mode 100644 index 323582f3b..000000000 --- a/legacy/deploy/deploy.py +++ /dev/null @@ -1,282 +0,0 @@ -import argparse -import pathlib -from pip._vendor import tomli -from string import Template -import os -from typing import Final - -gpu_archs = [ - "VOLTA", - "TURING", - "AMPERE", - "MAXWELL", - "PASCAL", - "KEPLER", - "INTEL", - "VEGA", - "NAVI", -] - - -class ColoredText(str): - def __new__(cls, text, color): - return super().__new__(cls, text) - - def __init__(self, text, color): - self.color = color - self.colors = { - "red": "\033[0;31m", - "green": "\033[0;32m", - "blue": "\033[0;34m", - "gray": "\033[0;30m", - "nc": "\033[0m", - } - - def __str__(self): - return self.colors[self.color] + self + self.colors["nc"] - - def __repr__(self): - return str(self) - - -# Dependency not implemented error - -dependency_error: Final[str] = ColoredText( - "Dependency deployment not implemented yet. You can run the included .sh scripts manually. Run with `bash compile_.sh` -h for more info.", - "red", -) - -arg_parser = argparse.ArgumentParser(description="Deploy Entity modulefiles") - -arg_parser.add_argument( - "-c", - "--config", - help="Path to the specific configuration file", - default="config.toml", - type=pathlib.Path, - required=True, -) -arg_parser.add_argument( - "-d", - "--deploy", - help="Execute the stript", - default=False, - action="store_true", -) -arg_parser.add_argument( - "-v", - "--verbose", - help="Print verbose output", - default=False, - action="store_true", -) -arg_parser.add_argument( - "--depends", - help="Also build, install & deploy the dependencies", - default=False, - action="store_true", -) - -modulefile_template = Template( - """ -#%Module1.0###################################################################### -## -## Entity ${configuration} -## -################################################################################ -proc ModulesHelp { } { - puts stderr "\\tEntity ${configuration}\\n" -} - -module-whatis "Entity ${configuration}" - -conflict entity - -${kokkos_setenvs} -${openmp_setenvs} - -${entity_setenvs} - -${modules} -""" -) - - -def get_suffix(debug=False, mpi=False, cuda=False, archs=[]): - return ( - ("/debug" if debug else "") - + ("/mpi" if mpi else "") - + ("/cuda" if cuda else "") - + "/" - + "/".join( - f"{arch.lower()}" - for arch in sorted( - archs, key=lambda x: not any([any(ar in x for ar in gpu_archs)]) - ) - ) - ) - - -if __name__ == "__main__": - args = arg_parser.parse_args() - configfname = args.config - dependency_build_scripts = [] - with open(configfname, "r", encoding="utf-8") as f: - config = tomli.loads(f.read()) - modulepath = pathlib.Path(os.path.expandvars(config["entity"]["modulepath"])) - instances = config["entity"]["instances"] - dependencies = config["dependencies"] - cc_module = None - if (cc_path := dependencies["cc"]).startswith("module:"): - cc_module = cc_path.split(":")[1] - for debug in instances["debug"]: - for mpi in instances["with_mpi"]: - for cuda in instances["with_cuda"]: - for architectures in instances["archs"]: - archs = architectures.split(",") - isgpu = any([any(ar in a for ar in gpu_archs) for a in archs]) - if cuda != isgpu: - continue - - entity_setenvs = [] - kokkos_setenvs = [] - openmp_setenvs = [] - modules = [cc_module] if cc_module else [] - entity_setenvs += [ - ["Entity_ENABLE_DEBUG", "ON" if debug else "OFF"] - ] - entity_setenvs += [ - ["Entity_ENABLE_MPI", "ON" if mpi else "OFF"] - ] - if cuda and (cuda_path := dependencies["cuda"]).startswith( - "module:" - ): - modules += [(cuda_module := cuda_path.split(":")[1])] - for arch in archs: - kokkos_setenvs += [[f"Kokkos_ARCH_{arch}", "ON"]] - - if mpi and (mpi_path := dependencies["mpi"]).startswith( - "module:" - ): - modules += [ - ( - mpi_module := mpi_path.split(":")[1] - + ("/cuda" if cuda else "/cpu") - ) - ] - else: - openmp_setenvs += [ - ["Kokkos_ENABLE_OPENMP", "ON"], - ["OMP_PROC_BIND", "spread"], - ["OMP_PLACES", "threads"], - ["OMP_NUM_THREADS", "[exec nproc]"], - ] - if (hdf5_path := dependencies["hdf5"]).startswith("module:"): - hdf5_path += ( - ("/mpi" if cuda else "/mpi") if mpi else "/serial" - ) - modules += [ - (hdf5_module := hdf5_path.split(":")[1]) - + ("/cuda" if cuda else "/cpu") - if mpi - else "" - ] - kokkos_setenvs += [ - ["Kokkos_ENABLE_CUDA", "ON" if cuda else "OFF"] - ] - - suffix = get_suffix(debug, mpi, cuda, archs) - if (kokkos_path := dependencies["kokkos"]).startswith( - "module:" - ): - modules += [ - ( - kokkos_module := ( - kokkos_path := kokkos_path + suffix - ).split(":")[1] - ) - ] - if (adios2_path := dependencies["adios2"]).startswith( - "module:" - ): - modules += [ - ( - adios2_module := ( - adios2_path := adios2_path + suffix - ).split(":")[1] - ) - ] - configuration = suffix.upper().replace("/", " @ ")[1:] - entity_setenvs = "\n".join( - f"{'setenv':<16}{e[0]:<27}{e[1]}" for e in entity_setenvs - ) - kokkos_setenvs = "\n".join( - f"{'setenv':<16}{e[0]:<27}{e[1]}" for e in kokkos_setenvs - ) - openmp_setenvs = "\n".join( - f"{'setenv':<16}{e[0]:<27}{e[1]}" for e in openmp_setenvs - ) - modules = "\n".join( - f"{'module load':<16}" + os.path.expandvars(m) - for m in modules - ) - modulefile = pathlib.Path.joinpath(modulepath, suffix[1:]) - modulefile_content = modulefile_template.substitute( - configuration=configuration, - entity_setenvs=entity_setenvs, - kokkos_setenvs=kokkos_setenvs, - openmp_setenvs=openmp_setenvs, - modules=modules, - ) - if args.deploy: - modulefile.parent.mkdir(parents=True, exist_ok=True) - with open(modulefile, "w") as f: - f.write(modulefile_content.strip()) - print(modulefile) - if args.verbose or not args.deploy: - print( - ColoredText(modulefile_content, "nc"), - sep="\n", - ) - - if args.depends: - dlm = " \\\n " - arch_flag = f"--arch {architectures}" - with_debug = f"--debug ON{dlm}" if debug else "" - with_cc = f"--with-cc {cc_path}{dlm}" - with_cuda = ( - f"--with-cuda {cuda_path}{dlm}" - if cuda - else f"--with-cuda OFF{dlm}" - ) - with_mpi = ( - f"--with-mpi {mpi_path}{dlm}" - if mpi - else f"--with-mpi OFF{dlm}" - ) - with_hdf5 = f"--with-hdf5 {hdf5_path}{dlm}" - with_kokkos = f"--with-kokkos {kokkos_path}{dlm}" - flags_kokkos = "{with_debug}{with_cc}{with_cuda}{with_mpi}{with_hdf5}{arch_flag}".format( - **locals() - ) - flags_adios2 = "{with_debug}{with_cc}{with_cuda}{with_mpi}{with_hdf5}{with_kokkos}{arch_flag}".format( - **locals() - ) - dependency_build_scripts += [ - f"bash compile_kokkos.sh{dlm}{flags_kokkos}" - ] - dependency_build_scripts += [ - f"bash compile_adios2.sh{dlm}{flags_adios2}" - ] - if args.depends: - print( - ColoredText("Use the following commands to build the dependencies:", "blue") - ) - print() - for group in ["kokkos", "adios2"]: - print(ColoredText(f"{group}", "green")) - print("---") - for script in dependency_build_scripts: - if f"compile_{group}" in script: - print(script) - print() diff --git a/legacy/deploy/personal.toml b/legacy/deploy/personal.toml deleted file mode 100644 index 3f5632311..000000000 --- a/legacy/deploy/personal.toml +++ /dev/null @@ -1,16 +0,0 @@ -[dependencies] -cuda = "module:cuda/12.0" -cc = "module:gcc/11.4" -mpi = "module:$HOME/opt/.modules/ompi" -hdf5 = "module:$HOME/opt/.modules/hdf5" -kokkos = "module:$HOME/opt/.modules/kokkos" -adios2 = "module:$HOME/opt/.modules/adios2" - -[entity] -modulepath = "$HOME/.modules/entity" - -[entity.instances] -debug = [true, false] -with_cuda = [true, false] -with_mpi = [true, false] -archs = ["ZEN2,AMPERE86", "ZEN2"] diff --git a/legacy/deploy/stellar.toml b/legacy/deploy/stellar.toml deleted file mode 100644 index f4c76d97b..000000000 --- a/legacy/deploy/stellar.toml +++ /dev/null @@ -1,16 +0,0 @@ -[dependencies] -cuda = "module:cudatoolkit/12.0" -cc = "module:gcc-toolset/10" -mpi = "module:$HOME/opt/.modules/ompi" -hdf5 = "module:$HOME/opt/.modules/hdf5" -kokkos = "module:$HOME/opt/.modules/kokkos" -adios2 = "module:$HOME/opt/.modules/adios2" - -[entity] -modulepath = "$HOME/.modules/entity" - -[entity.instances] -debug = [true, false] -with_cuda = [true, false] -with_mpi = [true, false] -archs = ["ZEN2,AMPERE80", "VOLTA70,SKX", "SKX"] diff --git a/legacy/deploy/zaratan.toml b/legacy/deploy/zaratan.toml deleted file mode 100644 index 4094ddf13..000000000 --- a/legacy/deploy/zaratan.toml +++ /dev/null @@ -1,16 +0,0 @@ -[dependencies] -cuda = "module:cuda/11.8.0/gcc/11.3.0/zen2" -cc = "module:gcc/11.3.0" -mpi = "" -hdf5 = "module:hdf5" -kokkos = "module:$HOME/opt/.modules/kokkos" -adios2 = "module:$HOME/opt/.modules/adios2" - -[entity] -modulepath = "$HOME/opt/.modules/entity" - -[entity.instances] -debug = [false] -with_cuda = [true] -with_mpi = [false] -archs = ["ZEN2,AMPERE80"] diff --git a/legacy/src/framework/digital_filters.hpp b/legacy/src/framework/digital_filters.hpp deleted file mode 100644 index 0d9fc776a..000000000 --- a/legacy/src/framework/digital_filters.hpp +++ /dev/null @@ -1,180 +0,0 @@ - -template -class DigitalFilter_kernel> : public DigitalFilterBase { - - using DigitalFilterBase::DigitalFilterBase; - using DigitalFilterBase::array; - using DigitalFilterBase::buffer; - using DigitalFilterBase::size; - -public: - Inline void operator()(index_t i, index_t j) const override { - if constexpr (D == Dim::_2D) { - const std::size_t j_min = N_GHOSTS, j_min_p1 = j_min + 1; - const std::size_t j_max = size[1] + N_GHOSTS, j_max_m1 = j_max - 1; - real_t cur_ij, cur_ijp1, cur_ijm1; -#if defined(BELYAEV_FILTER) // Belyaev filter - if (j == j_min) { - /* --------------------------------- r, phi --------------------------------- */ - for (auto& comp : { cur::jx1, cur::jx3 }) { - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, comp, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, comp, i, j + 1); - // ... filter in theta - array(i, j, comp) = INV_2 * cur_ij + INV_4 * cur_ijp1; - } - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx2, i, j + 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_4 * (cur_ij + cur_ijp1); - } else if (j == j_min_p1) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - for (auto& comp : { cur::jx1, cur::jx3 }) { - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, comp, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, comp, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, comp, i, j - 1); - // ... filter in theta - array(i, j, comp) = INV_2 * (cur_ij + cur_ijm1) + INV_4 * cur_ijp1; - } - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx2, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx2, i, j - 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_2 * cur_ij + INV_4 * (cur_ijm1 + cur_ijp1); - } else if (j == j_max_m1) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - for (auto& comp : { cur::jx1, cur::jx3 }) { - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, comp, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, comp, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, comp, i, j - 1); - // ... filter in theta - array(i, j, comp) = INV_2 * (cur_ij + cur_ijp1) + INV_4 * cur_ijm1; - } - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx2, i, j - 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_4 * (cur_ij + cur_ijm1); - } else if (j == j_max) { - /* --------------------------------- r, phi --------------------------------- */ - for (auto& comp : { cur::jx1, cur::jx3 }) { - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, comp, i, j); - cur_ijm1 = FILTER_IN_I1(buffer, comp, i, j - 1); - // ... filter in theta - array(i, j, comp) = INV_2 * cur_ij + INV_4 * cur_ijm1; - } - // no theta component in the last cell - } else { -#else // more conventional filtering - if (j == j_min) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx1, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx1, i, j + 1); - // ... filter in theta - array(i, j, cur::jx1) = INV_2 * cur_ij + INV_2 * cur_ijp1; - - array(i, j, cur::jx3) = ZERO; - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx2, i, j + 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_4 * (cur_ij + cur_ijp1); - } else if (j == j_min_p1) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx1, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx1, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx1, i, j - 1); - // ... filter in theta - array(i, j, cur::jx1) = INV_2 * cur_ij + INV_4 * (cur_ijp1 + cur_ijm1); - - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx3, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx3, i, j + 1); - // ... filter in theta - array(i, j, cur::jx3) = INV_2 * cur_ij + INV_4 * cur_ijp1; - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx2, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx2, i, j - 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_2 * cur_ij + INV_4 * (cur_ijm1 + cur_ijp1); - } else if (j == j_max_m1) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx1, i, j); - cur_ijp1 = FILTER_IN_I1(buffer, cur::jx1, i, j + 1); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx1, i, j - 1); - // ... filter in theta - array(i, j, cur::jx1) = INV_2 * cur_ij + INV_4 * (cur_ijm1 + cur_ijp1); - - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx3, i, j); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx3, i, j - 1); - // ... filter in theta - array(i, j, cur::jx3) = INV_2 * cur_ij + INV_4 * cur_ijm1; - - /* ---------------------------------- theta --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx2, i, j); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx2, i, j - 1); - // ... filter in theta - array(i, j, cur::jx2) = INV_4 * (cur_ij + cur_ijm1); - } else if (j == j_max) { - /* --------------------------------- r, phi --------------------------------- */ - // ... filter in r - cur_ij = FILTER_IN_I1(buffer, cur::jx1, i, j); - cur_ijm1 = FILTER_IN_I1(buffer, cur::jx1, i, j - 1); - // ... filter in theta - array(i, j, cur::jx1) = INV_2 * cur_ij + INV_2 * cur_ijm1; - - array(i, j, cur::jx3) = ZERO; - } else { -#endif -#pragma unroll - for (auto& comp : { cur::jx1, cur::jx2, cur::jx3 }) { - array(i, j, comp) = INV_4 * buffer(i, j, comp) + - INV_8 * - (buffer(i - 1, j, comp) + buffer(i + 1, j, comp) + - buffer(i, j - 1, comp) + buffer(i, j + 1, comp)) + - INV_16 * (buffer(i - 1, j - 1, comp) + - buffer(i + 1, j + 1, comp) + - buffer(i - 1, j + 1, comp) + - buffer(i + 1, j - 1, comp)); - } - } - } else { // D != Dim::_2D - raise::KernelError( - HERE, - "DigitalFilter_kernel: 2D implementation called for D != 2"); - } - } - - Inline void operator()(index_t, index_t, index_t) const override { - if constexpr (D == Dim::_3D) { - raise::KernelNotImplementedError(HERE); - } else { - raise::KernelError( - HERE, - "DigitalFilter_kernel: 3D implementation called for D != 3"); - } - } -}; \ No newline at end of file diff --git a/legacy/src/framework/io/output_csv.cpp b/legacy/src/framework/io/output_csv.cpp deleted file mode 100644 index 54c127157..000000000 --- a/legacy/src/framework/io/output_csv.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "wrapper.h" -#include "output_csv.h" -#include "simulation.h" -#include "meshblock/meshblock.h" -#include "particle_macros.h" - -#include - -#include -#include -#include -#include - -// namespace fs = std::filesystem; - -namespace ntt { - namespace csv { - // void ensureFileExists(const std::string& filename) { - // fs::path file(filename); - // if (!fs::exists(file)) { throw std::runtime_error("File does not exist: " + filename); - // } - // } - - // template - // void writeField(const std::string&, const Meshblock&, const em&) { - // // writeField(const std::string& filename, const Meshblock& mblock, const em& - // // field) { rapidcsv::Document doc( - // // "", rapidcsv::LabelParams(-1, -1), rapidcsv::SeparatorParams(',', false, false)); - // // if constexpr (D == Dim1) { - // // auto N = mblock.Ni1(); - // // for (int i {0}; i < N; ++i) { - // // doc.SetCell(i, 0, mblock.em(i + ntt::N_GHOSTS, field)); - // // } - // // } else if constexpr (D == Dim2) { - // // auto N1 = mblock.Ni1(); - // // auto N2 = mblock.Ni2(); - // // for (int i {0}; i < N1; ++i) { - // // for (int j {0}; j < N2; ++j) { - // // doc.SetCell(i, j, mblock.em(i + ntt::N_GHOSTS, j + ntt::N_GHOSTS, - // // field)); - // // } - // // } - // // } else { - // // (void)(field); - // // NTTHostError("Cannot write 3D field data as csv"); - // // } - // // doc.Save(filename); - // } - - // template - // void writeField(const std::string&, const Meshblock&, const cur&) { - // // writeField(const std::string& filename, const Meshblock& mblock, const cur& - // // field) { - // // rapidcsv::Document doc( - // // "", rapidcsv::LabelParams(-1, -1), rapidcsv::SeparatorParams(',', false, - // false)); - // // if constexpr (D == Dim1) { - // // auto N = mblock.Ni1(); - // // for (int i {0}; i < N; ++i) { - // // doc.SetCell(i, 0, mblock.cur(i + ntt::N_GHOSTS, field)); - // // } - // // } else if constexpr (D == Dim2) { - // // auto N1 = mblock.Ni1(); - // // auto N2 = mblock.Ni2(); - // // for (int i {0}; i < N1; ++i) { - // // for (int j {0}; j < N2; ++j) { - // // doc.SetCell(i, j, mblock.cur(i + ntt::N_GHOSTS, j + ntt::N_GHOSTS, - // // field)); - // // } - // // } - // // } else { - // // (void)(field); - // // NTTHostError("Cannot write 3D field data as csv"); - // // } - // // doc.Save(filename); - // } - - // template - // void writeParticle(std::string filename, - // const Meshblock& mblock, - // const std::size_t& species_id, - // const std::size_t& prtl_id, - // const OutputMode& mode) { - // std::ofstream outfile; - - // if (mode == OutputMode::APPEND) { - // try { - // ensureFileExists(filename); - // outfile.open(filename, std::ios_base::app); - // } - // catch (const std::exception& e) { - // PLOGI << e.what(); - // PLOGI << "Creating new file instead."; - // outfile.open(filename); - // outfile << "ux1,ux2,ux3,w,x1,x2,x3" << std::endl; - // } - // } - // if (!outfile.is_open()) { - // throw std::runtime_error("Could not open or create file: " + filename); - // } - - // outfile << mblock.particles[species_id].ux1(prtl_id) << ","; - // outfile << mblock.particles[species_id].ux2(prtl_id) << ","; - // outfile << mblock.particles[species_id].ux3(prtl_id) << ","; - // outfile << mblock.particles[species_id].weight(prtl_id); - // if constexpr (D == Dim1 || D == Dim2 || D == Dim3) { - // auto x1 = get_prtl_x1(mblock.particles[species_id], prtl_id); - // outfile << "," << x1; - // } - // if constexpr (D == Dim2 || D == Dim3) { - // auto x2 = get_prtl_x2(mblock.particles[species_id], prtl_id); - // outfile << "," << x2; - // } else if constexpr (D == Dim3) { - // auto x3 = get_prtl_x3(mblock.particles[species_id], prtl_id); - // outfile << "," << x3; - // } - // outfile << "\n"; - - // outfile.close(); - // } - } // namespace csv -} // namespace ntt - -#ifdef PIC_SIMTYPE - -// using Meshblock1D = ntt::Meshblock; -// using Meshblock2D = ntt::Meshblock; -// using Meshblock3D = ntt::Meshblock; - -// template void ntt::csv::writeField(const std::string&, -// const Meshblock1D&, -// const em&); -// template void ntt::csv::writeField(const std::string&, -// const Meshblock2D&, -// const em&); -// template void ntt::csv::writeField(const std::string&, -// const Meshblock3D&, -// const em&); - -// template void ntt::csv::writeField(const std::string&, -// const Meshblock1D&, -// const cur&); -// template void ntt::csv::writeField(const std::string&, -// const Meshblock2D&, -// const cur&); -// template void ntt::csv::writeField(const std::string&, -// const Meshblock3D&, -// const cur&); - -// template void ntt::csv::writeParticle( -// std::string, const Meshblock1D&, const std::size_t&, const std::size_t&, const -// OutputMode&); -// template void ntt::csv::writeParticle( -// std::string, const Meshblock2D&, const std::size_t&, const std::size_t&, const -// OutputMode&); -// template void ntt::csv::writeParticle( -// std::string, const Meshblock3D&, const std::size_t&, const std::size_t&, const -// OutputMode&); - -// #elif defined(GRPIC_SIMTYPE) - -// using Meshblock2D = ntt::Meshblock; -// using Meshblock3D = ntt::Meshblock; - -// template void ntt::csv::writeField(const -// std::string&, -// const -// Meshblock2D&, -// const em&); -// template void ntt::csv::writeField(const -// std::string&, -// const -// Meshblock3D&, -// const em&); - -// template void ntt::csv::writeField(const -// std::string&, -// const -// Meshblock2D&, -// const cur&); -// template void ntt::csv::writeField(const -// std::string&, -// const -// Meshblock3D&, -// const cur&); - -// template void ntt::csv::writeParticle( -// std::string, const Meshblock2D&, const std::size_t&, const std::size_t&, const -// OutputMode&); -// template void ntt::csv::writeParticle( -// std::string, const Meshblock3D&, const std::size_t&, const std::size_t&, const -// OutputMode&); - -#endif \ No newline at end of file diff --git a/legacy/src/framework/io/output_csv.h b/legacy/src/framework/io/output_csv.h deleted file mode 100644 index 7217bfdce..000000000 --- a/legacy/src/framework/io/output_csv.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef IO_OUTPUT_CSV_H -#define IO_OUTPUT_CSV_H - -#include "wrapper.h" -#include "io/output.h" -#include "meshblock/meshblock.h" - -namespace ntt { - // enum class OutputMode { UNDEFINED, WRITE, APPEND }; - - namespace csv { - // /** - // * @brief Write a field component to a csv file. - // * @param[in] fname Filename to write to. - // * @param[in] mblock Meshblock. - // * @param[in] em Field component to output. - // */ - // template - // void writeField(const std::string&, const Meshblock&, const em&); - - // /** - // * @brief Write a current component to a csv file. - // * @overload - // * @param[in] fname Filename to write to. - // * @param[in] mblock Meshblock. - // * @param[in] cur Current component to output. - // */ - // template - // void writeField(const std::string&, const Meshblock&, const cur&); - - // /** - // * @brief Write a particle data to a csv file. - // * @param[in] fname Filename to write to. - // * @param[in] mblock Meshblock. - // * @param[in] spec_id Species id. - // * @param[in] prtl_id Particle id. - // * @param[in] mode Write mode {WRITE, APPEND}. - // */ - // template - // void writeParticle(std::string, - // const Meshblock&, - // const std::size_t&, - // const std::size_t&, - // const OutputMode& mode = OutputMode::WRITE); - - // /** - // * @brief Ensure the file exists (raises an error if not). - // */ - // void ensureFileExists(const std::string&); - } // namespace csv -} // namespace ntt - -#endif \ No newline at end of file diff --git a/legacy/src/framework/ks_phys_units.h b/legacy/src/framework/ks_phys_units.h deleted file mode 100644 index f7a1b67a3..000000000 --- a/legacy/src/framework/ks_phys_units.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef FRAMEWORK_METRICS_KS_PHYS_UNITS_H -#define FRAMEWORK_METRICS_KS_PHYS_UNITS_H - -#ifdef __INTELLISENSE__ -# pragma diag_suppress 77 -# pragma diag_suppress 65 -#endif - -/** - * Compute metric component 11 in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h_11 (covariant, lower index) metric component. - */ -Inline auto h_11_phys(const coord_t& x) const -> real_t { - real_t r { x[0] * dr + this->x1_min }; - real_t theta { x[1] * dtheta }; - real_t cth { math::cos(theta) }; - return (ONE + TWO * r / (SQR(r) + a_sqr * SQR(cth))); -} - -/** - * Compute metric component 22 in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h_22 (covariant, lower index) metric component. - */ -Inline auto h_22_phys(const coord_t& x) const -> real_t { - real_t r { x[0] * dr + this->x1_min }; - real_t theta { x[1] * dtheta }; - real_t cth { math::cos(theta) }; - return (SQR(r) + a_sqr * SQR(cth)); -} - -/** - * Compute metric component 33 in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h_33 (covariant, lower index) metric component. - */ -Inline auto h_33_phys(const coord_t& x) const -> real_t { - real_t r { x[0] * dr + this->x1_min }; - real_t theta { x[1] * dtheta }; - real_t cth { math::cos(theta) }; - real_t sth { math::sin(theta) }; - - real_t delta { SQR(r) - TWO * r + a_sqr }; - real_t As { (SQR(r) + a_sqr) * (SQR(r) + a_sqr) - a_sqr * delta * SQR(sth) }; - return As * SQR(sth) / (SQR(r) + a_sqr * SQR(cth)); -} - -/** - * Compute metric component 13 in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h_13 (covariant, lower index) metric component. - */ -Inline auto h_13_phys(const coord_t& x) const -> real_t { - real_t r { x[0] * dr + this->x1_min }; - real_t theta { x[1] * dtheta }; - real_t sth { math::sin(theta) }; - return -a * SQR(sth) * (ONE + TWO * r / (SQR(r) + a_sqr * SQR(cth))); -} - -/** - * Compute inverse metric component 11 from h_ij in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h^11 (contravariant, upper index) metric component. - */ -Inline auto h11_phys(const coord_t& x) const -> real_t { - return h_33_phys(x) / (h_11_phys(x) * h_33_phys(x) - SQR(h_13_phys(x))); -} - -/** - * Compute inverse metric component 22 from h_ij in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h^22 (contravariant, upper index) metric component. - */ -Inline auto h22_phys(const coord_t& x) const -> real_t { - return ONE / h_22_phys(x); -} - -/** - * Compute inverse metric component 33 from h_ij in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h^33 (contravariant, upper index) metric component. - */ -Inline auto h33_phys(const coord_t& x) const -> real_t { - return h_11_phys(x) / (h_11_phys(x) * h_33_phys(x) - SQR(h_13_phys(x))); -} - -/** - * Compute inverse metric component 13 from h_ij in physical coordinate basis. - * - * @param x coordinate array in code units (size of the array is D). - * @returns h^13 (contravariant, upper index) metric component. - */ -Inline auto h13_phys(const coord_t& x) const -> real_t { - return -h_13_phys(x) / (h_11_phys(x) * h_33_phys(x) - SQR(h_13_phys(x))); -} - -#ifdef __INTELLISENSE__ -# pragma diag_default 65 -# pragma diag_default 77 -#endif - -#endif // FRAMEWORK_METRICS_KS_PHYS_UNITS_H \ No newline at end of file diff --git a/legacy/src/framework/metrics/kerr_schild_nomass.h b/legacy/src/framework/metrics/kerr_schild_nomass.h deleted file mode 100644 index f706befe0..000000000 --- a/legacy/src/framework/metrics/kerr_schild_nomass.h +++ /dev/null @@ -1,335 +0,0 @@ -#ifndef FRAMEWORK_METRICS_KERR_SCHILD_H -#define FRAMEWORK_METRICS_KERR_SCHILD_H - -#include "wrapper.h" - -#include "metric_base.h" - -#include -#include - -namespace ntt { - /** - * Kerr metric in Kerr-Schild coordinates - * Units: c = rg = 1 - * - * @tparam D dimension. - */ - template - class Metric : public MetricBase { - private: - const real_t dr, dtheta, dphi; - const real_t dr_inv, dtheta_inv, dphi_inv; - const real_t dr_sqr, dtheta_sqr, dphi_sqr; - // Spin parameter, in [0,1[ - // and horizon size in units of rg - // all physical extents are in units of rg - const real_t rh, a, a_sqr; - - public: - const real_t dx_min; - - Metric(std::vector resolution, - std::vector extent, - const real_t* params) - : MetricBase { "kerr_schild", resolution, extent }, - rh { params[5] }, - a { params[4] }, - a_sqr { SQR(a) }, - dr { (this->x1_max - this->x1_min) / this->nx1 }, - dtheta { (real_t)(constant::PI) / this->nx2 }, - dphi { (real_t)(constant::TWO_PI) / this->nx3 }, - dr_inv { ONE / dr }, - dtheta_inv { ONE / dtheta }, - dphi_inv { ONE / dphi }, - dr_sqr { SQR(dr) }, - dtheta_sqr { SQR(dtheta) }, - dphi_sqr { SQR(dphi) }, - dx_min { findSmallestCell() } {} - ~Metric() = default; - - [[nodiscard]] auto spin() const -> const real_t& { - return a; - } - - [[nodiscard]] auto rhorizon() const -> const real_t& { - return rh; - } - - Inline auto h_11(const coord_t& x) const -> real_t { - return dr_sqr; - } - Inline auto h_22(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - return dtheta_sqr * SQR(r); - } - Inline auto h_33(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - const real_t theta { x[1] * dtheta }; - if constexpr (D == Dim2) { - return SQR(r * math::sin(theta)); - } else { - return dphi_sqr * SQR(r * math::sin(theta)); - } - } - Inline auto h_13(const coord_t& x) const -> real_t { - return ZERO; - } - Inline auto h11(const coord_t& x) const -> real_t { - return SQR(dr_inv); - } - Inline auto h22(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - return SQR(dtheta_inv / r); - } - Inline auto h33(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - const real_t theta { x[1] * dtheta }; - if constexpr (D == Dim2) { - return ONE / (SQR(r * math::sin(theta))); - } else { - return SQR(dphi_inv / (r * math::sin(theta))); - } - } - Inline auto h13(const coord_t& x) const -> real_t { - return ZERO; - } - Inline auto alpha(const coord_t& x) const -> real_t { - return ONE; - } - Inline auto beta1(const coord_t& x) const -> real_t { - return ZERO; - } - Inline auto sqrt_det_h(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - const real_t theta { x[1] * dtheta }; - // ?ASK is this correct? - if constexpr (D == Dim2) { - return dr * dtheta * SQR(r) * math::sin(theta); - } else { - return dr * dtheta * dphi * SQR(r) * math::sin(theta); - } - } - Inline auto sqrt_det_h_tilde(const coord_t& x) const -> real_t { - const real_t r { x[0] * dr + this->x1_min }; - const real_t theta { x[1] * dtheta }; - // ?ASK is this correct? - if constexpr (D == Dim2) { - return dr * dtheta * SQR(r); - } else { - return dr * dtheta * dphi * SQR(r); - } - } - - /** - * Compute the fiducial minimum cell volume. - * - * @returns Minimum cell volume of the grid [code units]. - */ - Inline auto min_cell_volume() const -> real_t { - return math::pow(dx_min * math::sqrt(static_cast(D)), static_cast(D)); - } - - /** - * Compute the area at the pole (used in axisymmetric solvers). - * Approximate solution for the polar area. - * - * @param x coordinate array in code units - * @returns Area at the pole. - */ - Inline auto polar_area(const coord_t& x) const -> real_t { - real_t r { x[0] * dr + this->x1_min }; - real_t del_theta { x[1] * dtheta }; - return dr * SQR(r) * (ONE - math::cos(del_theta)); - } -/** - * @note Since kokkos disallows virtual inheritance, we have to - * include vector transformations for a non-diagonal metric here - * (and not in the base class). - */ -#include "metrics_utils/ks_common.h" -#include "metrics_utils/sph_common.h" - - /** - * Compute minimum effective cell size for a given metric (in physical units). - * @returns Minimum cell size of the grid [physical units]. - */ - auto findSmallestCell() const -> real_t { - if constexpr (D == Dim2) { - real_t min_dx { -ONE }; - for (int i { 0 }; i < this->nx1; ++i) { - for (int j { 0 }; j < this->nx2; ++j) { - real_t i_ { static_cast(i) + HALF }; - real_t j_ { static_cast(j) + HALF }; - coord_t ij { i_, j_ }; - real_t dx = ONE - / (this->alpha(ij) * std::sqrt(this->h11(ij) + this->h22(ij)) - + this->beta1(ij)); - if ((min_dx > dx) || (min_dx < 0.0)) { - min_dx = dx; - } - } - } - return min_dx; - } else { - NTTHostError("min cell finding not implemented for 3D"); - return ZERO; - } - } - - /** - * Coordinate conversion from code units to Cartesian physical units. - * - * @param xi coordinate array in code units - * @param x coordinate array in Cartesian physical units - */ - Inline void x_Code2Cart(const coord_t& xi, coord_t& x) const { - if constexpr (D == Dim2) { - coord_t x_sph; - x_Code2Sph(xi, x_sph); - x[0] = x_sph[0] * math::sin(x_sph[1]); - x[1] = x_sph[0] * math::cos(x_sph[1]); - } else if constexpr (D == Dim3) { - coord_t x_sph; - x_Code2Sph(xi, x_sph); - x[0] = x_sph[0] * math::sin(x_sph[1]) * math::cos(x_sph[2]); - x[1] = x_sph[0] * math::sin(x_sph[1]) * math::sin(x_sph[2]); - x[2] = x_sph[0] * math::cos(x_sph[1]); - } - } - - /** - * Coordinate conversion from Cartesian physical units to code units. - * - * @param x coordinate array in Cartesian coordinates in physical units - * @param xi coordinate array in code units - */ - Inline void x_Cart2Code(const coord_t& x, coord_t& xi) const { - if constexpr (D == Dim2) { - coord_t x_sph; - x_sph[0] = math::sqrt(x[0] * x[0] + x[1] * x[1]); - x_sph[1] = math::atan2(x[1], x[0]); - x_Sph2Code(x_sph, xi); - } else if constexpr (D == Dim3) { - coord_t x_sph; - x_sph[0] = math::sqrt(x[0] * x[0] + x[1] * x[1] + x[2] * x[2]); - x_sph[1] = math::atan2(x[1], x[0]); - x_sph[2] = math::acos(x[2] / x_sph[0]); - x_Sph2Code(x_sph, xi); - } - } - - /** - * Coordinate conversion from code units to Spherical physical units. - * - * @param xi coordinate array in code units - * @param x coordinate array in Spherical coordinates in physical units - */ - Inline void x_Code2Sph(const coord_t& xi, coord_t& x) const { - if constexpr (D == Dim2) { - x[0] = xi[0] * dr + this->x1_min; - x[1] = xi[1] * dtheta; - } else if constexpr (D == Dim3) { - x[0] = xi[0] * dr + this->x1_min; - x[1] = xi[1] * dtheta; - x[2] = xi[2] * dphi; - } - } - - /** - * Coordinate conversion from Spherical physical units to code units. - * - * @param x coordinate array in Spherical coordinates in physical units - * @param xi coordinate array in code units - */ - Inline void x_Sph2Code(const coord_t& x, coord_t& xi) const { - if constexpr (D == Dim2) { - xi[0] = (x[0] - this->x1_min) * dr_inv; - xi[1] = x[1] * dtheta_inv; - } else if constexpr (D == Dim3) { - xi[0] = (x[0] - this->x1_min) * dr_inv; - xi[1] = x[1] * dtheta_inv; - xi[2] = x[2] * dphi_inv; - } - } - - /** - * Vector conversion from contravariant to spherical contravariant. - * - * @param xi coordinate array in code units - * @param vi_cntrv vector in contravariant basis - * @param vsph_cntrv vector in spherical contravariant basis - */ - Inline void v3_Cntrv2PhysCntrv(const coord_t&, - const vec_t& vi_cntrv, - vec_t& vsph_cntrv) const { - vsph_cntrv[0] = vi_cntrv[0] * dr; - vsph_cntrv[1] = vi_cntrv[1] * dtheta; - if constexpr (D == Dim2) { - vsph_cntrv[2] = vi_cntrv[2]; - } else { - vsph_cntrv[2] = vi_cntrv[2] * dphi; - } - } - - /** - * Vector conversion from spherical contravariant to contravariant. - * - * @param xi coordinate array in code units - * @param vsph_cntrv vector in spherical contravariant basis - * @param vi_cntrv vector in contravariant basis - */ - Inline void v3_PhysCntrv2Cntrv(const coord_t&, - const vec_t& vsph_cntrv, - vec_t& vi_cntrv) const { - vi_cntrv[0] = vsph_cntrv[0] * dr_inv; - vi_cntrv[1] = vsph_cntrv[1] * dtheta_inv; - if constexpr (D == Dim2) { - vi_cntrv[2] = vsph_cntrv[2]; - } else { - vi_cntrv[2] = vsph_cntrv[2] * dphi_inv; - } - } - - /** - * Vector conversion from covariant to spherical covariant. - * - * @param xi coordinate array in code units - * @param vi_cov vector in covariant basis - * @param vsph_cov vector in spherical covariant basis - */ - Inline void v3_Cov2PhysCov(const coord_t&, - const vec_t& vi_cov, - vec_t& vsph_cov) const { - vsph_cov[0] = vi_cov[0] * dr_inv; - vsph_cov[1] = vi_cov[1] * dtheta_inv; - if constexpr (D == Dim2) { - vsph_cov[2] = vi_cov[2]; - } else { - vsph_cov[2] = vi_cov[2] * dphi_inv; - } - } - - /** - * Vector conversion from covariant to spherical covariant. - * - * @param xi coordinate array in code units - * @param vsph_cov vector in spherical covariant basis - * @param vi_cov vector in covariant basis - */ - Inline void v3_PhysCov2Cov(const coord_t&, - const vec_t& vsph_cov, - vec_t& vi_cov) const { - vi_cov[0] = vsph_cov[0] * dr; - vi_cov[1] = vsph_cov[1] * dtheta; - if constexpr (D == Dim2) { - vi_cov[2] = vsph_cov[2]; - } else { - vi_cov[2] = vsph_cov[2] * dphi; - } - } - }; - -} // namespace ntt - -#endif diff --git a/legacy/src/framework/utils/current_filter.cpp b/legacy/src/framework/utils/current_filter.cpp deleted file mode 100644 index f9d1429c3..000000000 --- a/legacy/src/framework/utils/current_filter.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "wrapper.h" -// #include "current_filter.hpp" - -namespace ntt { - - // template <> - // void CurrentFilter::synchronizeGhostZones() const { - // auto ni {m_mesh.Ni1()}; - // auto mesh {m_mesh}; - // if (mesh.boundaries[0] == BoundaryCondition::PERIODIC) { - // Kokkos::parallel_for( - // "1d_gh_x1m", mesh.rangeCells({CellLayer::minGhostLayer}), Lambda(index_t i) { - // m_cur(i, cur::jx1) = m_cur(i + ni, cur::jx1); - // m_cur(i, cur::jx2) = m_cur(i + ni, cur::jx2); - // m_cur(i, cur::jx3) = m_cur(i + ni, cur::jx3); - // }); - // Kokkos::parallel_for( - // "1d_gh_x1p", mesh.rangeCells({CellLayer::maxGhostLayer}), Lambda(index_t i) { - // m_cur(i, cur::jx1) = m_cur(i - ni, cur::jx1); - // m_cur(i, cur::jx2) = m_cur(i - ni, cur::jx2); - // m_cur(i, cur::jx3) = m_cur(i - ni, cur::jx3); - // }); - // } - // } - - // template <> - // void CurrentFilter::synchronizeGhostZones() const { - // auto ni {m_mesh.Ni1()}; - // auto nj {m_mesh.Ni2()}; - // auto mesh {this->m_mesh}; - // if (mesh.boundaries[0] == BoundaryCondition::PERIODIC) { - // Kokkos::parallel_for( - // "2d_gh_x1m", - // mesh.rangeCells({CellLayer::minGhostLayer, CellLayer::activeLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i + ni, j, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i + ni, j, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i + ni, j, cur::jx3); - // }); - // Kokkos::parallel_for( - // "2d_gh_x1p", - // mesh.rangeCells({CellLayer::maxGhostLayer, CellLayer::activeLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i - ni, j, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i - ni, j, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i - ni, j, cur::jx3); - // }); - // } - // if (mesh.boundaries[1] == BoundaryCondition::PERIODIC) { - // Kokkos::parallel_for( - // "2d_gh_x2m", - // mesh.rangeCells({CellLayer::activeLayer, CellLayer::minGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i, j + nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i, j + nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i, j + nj, cur::jx3); - // }); - // Kokkos::parallel_for( - // "2d_gh_x2p", - // mesh.rangeCells({CellLayer::activeLayer, CellLayer::maxGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i, j - nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i, j - nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i, j - nj, cur::jx3); - // }); - // } - // if ((mesh.boundaries[0] == BoundaryCondition::PERIODIC) - // && (mesh.boundaries[1] == BoundaryCondition::PERIODIC)) { - // Kokkos::parallel_for( - // "2d_bc_corner1", - // mesh.rangeCells({CellLayer::minGhostLayer, CellLayer::minGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i + ni, j + nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i + ni, j + nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i + ni, j + nj, cur::jx3); - // }); - // Kokkos::parallel_for( - // "2d_bc_corner2", - // mesh.rangeCells({CellLayer::minGhostLayer, CellLayer::maxGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i + ni, j - nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i + ni, j - nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i + ni, j - nj, cur::jx3); - // }); - // Kokkos::parallel_for( - // "2d_bc_corner3", - // mesh.rangeCells({CellLayer::maxGhostLayer, CellLayer::minGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i - ni, j + nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i - ni, j + nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i - ni, j + nj, cur::jx3); - // }); - // Kokkos::parallel_for( - // "2d_bc_corner4", - // mesh.rangeCells({CellLayer::maxGhostLayer, CellLayer::maxGhostLayer}), - // Lambda(index_t i, index_t j) { - // m_cur(i, j, cur::jx1) = m_cur(i - ni, j - nj, cur::jx1); - // m_cur(i, j, cur::jx2) = m_cur(i - ni, j - nj, cur::jx2); - // m_cur(i, j, cur::jx3) = m_cur(i - ni, j - nj, cur::jx3); - // }); - // } - // } - - // template <> - // void CurrentFilter::synchronizeGhostZones() const {} - -} // namespace ntt - -// template struct ntt::CurrentFilter; -// template struct ntt::CurrentFilter; -// template struct ntt::CurrentFilter; diff --git a/legacy/src/framework/utils/current_filter.hpp b/legacy/src/framework/utils/current_filter.hpp deleted file mode 100644 index 7720f603f..000000000 --- a/legacy/src/framework/utils/current_filter.hpp +++ /dev/null @@ -1,272 +0,0 @@ -#ifndef UTILS_CURRENT_FILTER_H -#define UTILS_CURRENT_FILTER_H - -#include "wrapper.h" -#include "meshblock/meshblock.h" - -namespace ntt { -// /** -// * @brief Digital current filtering routine. -// * @tparam D Dimension. -// */ -// template -// struct CurrentFilter { -// ndfield_t m_cur; -// ndfield_t m_cur_b; -// Mesh m_mesh; -// const unsigned short m_npasses; -// tuple_t m_size; - -// /** -// * @brief Constructor. -// * @param cur Current field. -// * @param cur0 Backup current field. -// * @param npasses Number of filter passes. -// */ -// CurrentFilter(const ndfield_t& cur, -// const ndfield_t& cur_b, -// const Mesh& mesh, -// const unsigned short& npasses) -// : m_cur(cur), m_cur_b(cur_b), m_mesh(mesh), m_npasses(npasses) { -// for (short d = 0; d < (short)D; ++d) { -// m_size[d] = m_mesh.Ni(d); -// } -// } - -// void apply() { -// // for (unsigned short i = 0; i < m_npasses; ++i) { -// // synchronizeGhostZones(); -// // Kokkos::deep_copy(m_cur_b, m_cur); -// // filterPass(); -// // } -// } - -// /** -// * @brief 1D implementation of the algorithm. -// * @param i1 index. -// */ -// Inline void operator()(index_t) const; -// /** -// * @brief 2D implementation of the algorithm. -// * @param i1 index. -// * @param i2 index. -// */ -// Inline void operator()(index_t, index_t) const; -// /** -// * @brief 3D implementation of the algorithm. -// * @param i1 index. -// * @param i2 index. -// * @param i3 index. -// */ -// Inline void operator()(index_t, index_t, index_t) const; - -// void filterPass() { -// #ifdef MINKOWSKI_METRIC -// Kokkos::parallel_for("filter_pass", m_mesh.rangeActiveCells(), *this); -// #else -// if constexpr (D == Dim2) { -// Kokkos::parallel_for("filter_pass", -// CreateRangePolicy({m_mesh.i1_min(), m_mesh.i2_min()}, -// {m_mesh.i1_max(), m_mesh.i2_max() + 1}), -// *this); -// } else { -// Kokkos::parallel_for("filter_pass", m_mesh.rangeActiveCells(), *this); -// } -// #endif -// } -// void synchronizeGhostZones() const; -// }; - -// #ifdef MINKOWSKI_METRIC -// template <> -// Inline void CurrentFilter::operator()(index_t i) const { -// for (auto& comp : {cur::jx1, cur::jx2, cur::jx3}) { -// m_cur(i, comp) -// = INV_2 * m_cur_b(i, comp) + INV_4 * (m_cur_b(i - 1, comp) + m_cur_b(i + 1, comp)); -// } -// } - -// template <> -// Inline void CurrentFilter::operator()(index_t i, index_t j) const { -// for (auto& comp : {cur::jx1, cur::jx2, cur::jx3}) { -// m_cur(i, j, comp) = INV_4 * m_cur_b(i, j, comp) -// + INV_8 -// * (m_cur_b(i - 1, j, comp) + m_cur_b(i + 1, j, comp) -// + m_cur_b(i, j - 1, comp) + m_cur_b(i, j + 1, comp)) -// + INV_16 -// * (m_cur_b(i - 1, j - 1, comp) + m_cur_b(i + 1, j + 1, comp) -// + m_cur_b(i - 1, j + 1, comp) + m_cur_b(i + 1, j - 1, comp)); -// } -// } - -// template <> -// Inline void CurrentFilter::operator()(index_t i, index_t j, index_t k) const { -// for (auto& comp : {cur::jx1, cur::jx2, cur::jx3}) { -// m_cur(i, j, k, comp) -// = INV_8 * m_cur_b(i, j, k, comp) -// + INV_16 -// * (m_cur_b(i - 1, j, k, comp) + m_cur_b(i + 1, j, k, comp) -// + m_cur_b(i, j - 1, k, comp) + m_cur_b(i, j + 1, k, comp) -// + m_cur_b(i, j, k - 1, comp) + m_cur_b(i, j, k + 1, comp)) -// + INV_32 -// * (m_cur_b(i - 1, j - 1, k, comp) + m_cur_b(i + 1, j + 1, k, comp) -// + m_cur_b(i - 1, j + 1, k, comp) + m_cur_b(i + 1, j - 1, k, comp) -// + m_cur_b(i, j - 1, k - 1, comp) + m_cur_b(i, j + 1, k + 1, comp) -// + m_cur_b(i, j, k - 1, comp) + m_cur_b(i, j, k + 1, comp) -// + m_cur_b(i - 1, j, k - 1, comp) + m_cur_b(i + 1, j, k + 1, comp) -// + m_cur_b(i - 1, j, k + 1, comp) + m_cur_b(i + 1, j, k - 1, comp)) -// + INV_64 -// * (m_cur_b(i - 1, j - 1, k - 1, comp) + m_cur_b(i + 1, j + 1, k + 1, comp) -// + m_cur_b(i - 1, j + 1, k + 1, comp) + m_cur_b(i + 1, j - 1, k - 1, comp) -// + m_cur_b(i - 1, j - 1, k + 1, comp) + m_cur_b(i + 1, j + 1, k - 1, comp) -// + m_cur_b(i - 1, j + 1, k - 1, comp) + m_cur_b(i + 1, j - 1, k + 1, comp)); -// } -// } -// #else -// template <> -// Inline void CurrentFilter::operator()(index_t) const {} - -// # define FILTER_IN_I1(ARR, COMP, I, J) \ -// INV_2*(ARR)((I), (J), (COMP)) \ -// + INV_4*((ARR)((I)-1, (J), (COMP)) + (ARR)((I) + 1, (J), (COMP))) - -// template <> -// Inline void CurrentFilter::operator()(index_t i, index_t j) const { -// const std::size_t j_min = N_GHOSTS, j_min_p1 = j_min + 1; -// const std::size_t j_max = m_size[1] + N_GHOSTS - 1, j_max_m1 = j_max - 1; -// real_t cur_ij, cur_ijp1, cur_ijm1; -// # define BELYAEV_FILTER -// // # define REGULAR_FILTER - -// # ifdef BELYAEV_FILTER -// if (j == j_min) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j + 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) = INV_2 * cur_ij + INV_4 * cur_ijp1; - -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx3, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j + 1); -// // ... filter in theta -// m_cur(i, j, cur::jx3) = INV_2 * cur_ij + INV_4 * cur_ijp1; - -// /* ---------------------------------- theta --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx2, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx2, i, j + 1); -// // ... filter in theta -// m_cur(i, j, cur::jx2) = INV_4 * cur_ij + INV_4 * cur_ijp1; -// } else if (j == j_min_p1) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) = INV_2 * (cur_ij + cur_ijm1) + INV_4 * cur_ijp1; - -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx3, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx3) = INV_2 * (cur_ij + cur_ijm1) + INV_4 * cur_ijp1; -// } else if (j == j_max_m1) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) = INV_2 * (cur_ij + cur_ijp1) + INV_4 * cur_ijm1; - -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx3, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx3) = INV_2 * (cur_ij + cur_ijp1) + INV_4 * cur_ijm1; - -// /* ---------------------------------- theta --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx2, i, j); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx2, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx2) = INV_4 * cur_ij + INV_4 * cur_ijm1; -// } else if (j == j_max) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) -// = INV_2 * m_cur_b(i, j, cur::jx1) + INV_4 * m_cur_b(i, j - 1, cur::jx1); - -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx3, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j + 1); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx3, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx3) -// = INV_2 * m_cur_b(i, j, cur::jx3) + INV_4 * m_cur_b(i, j - 1, cur::jx3); -// } -// # elif defined(REGULAR_FILTER) -// if (j == j_min) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijp1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j + 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) = INV_2 * cur_ij + INV_2 * cur_ijp1; -// // FILTER_IN_I1(m_cur_b, cur::jx1, i, j); - -// /* ---------------------------------- theta --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx2, i, j); -// // ... filter in theta -// m_cur(i, j, cur::jx2) = INV_2 * cur_ij; -// // FILTER_IN_I1(m_cur_b, cur::jx2, i, j); -// // INV_2 * cur_ij; -// } else if (j == j_max + 1) { -// /* --------------------------------- r, phi --------------------------------- */ -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx1, i, j); -// cur_ijm1 = FILTER_IN_I1(m_cur_b, cur::jx1, i, j - 1); -// // ... filter in theta -// m_cur(i, j, cur::jx1) -// = INV_2 * m_cur_b(i, j, cur::jx1) + INV_2 * m_cur_b(i, j - 1, cur::jx1); - -// // ... filter in r -// cur_ij = FILTER_IN_I1(m_cur_b, cur::jx2, i, j); -// // ... filter in theta -// m_cur(i, j, cur::jx2) = INV_2 * cur_ij; -// } -// # endif -// else { -// for (auto& comp : {cur::jx1, cur::jx2, cur::jx3}) { -// m_cur(i, j, comp) -// = INV_4 * m_cur_b(i, j, comp) -// + INV_8 -// * (m_cur_b(i - 1, j, comp) + m_cur_b(i + 1, j, comp) + m_cur_b(i, j - 1, comp) -// + m_cur_b(i, j + 1, comp)) -// + INV_16 -// * (m_cur_b(i - 1, j - 1, comp) + m_cur_b(i + 1, j + 1, comp) -// + m_cur_b(i - 1, j + 1, comp) + m_cur_b(i + 1, j - 1, comp)); -// } -// } -// } - -// # undef FILTER_IN_I1 - -// template <> -// Inline void CurrentFilter::operator()(index_t, index_t, index_t) const {} - -// #endif - -} // namespace ntt - -#endif \ No newline at end of file diff --git a/legacy/src/framework/utils/particle_injectors.hpp b/legacy/src/framework/utils/particle_injectors.hpp deleted file mode 100644 index c275f170a..000000000 --- a/legacy/src/framework/utils/particle_injectors.hpp +++ /dev/null @@ -1,1018 +0,0 @@ -#ifndef ARCHETYPES_PARTICLE_INJECTOR_H -#define ARCHETYPES_PARTICLE_INJECTOR_H - -#include "utilities/archetypes.hpp" - -#include - -#include "meshblock/meshblock.h" -#include "meshblock/particles.h" -#include "particle_macros.h" -#include "sim_params.h" -#include "wrapper.h" - -namespace ntt { - - /* -------------------------------------------------------------------------- */ - /* Uniform injection kernels and routines */ - /* -------------------------------------------------------------------------- */ - - /** - * @brief 1D particle-vectorized injection kernel - */ - template class EnDist> - struct UniformInjector1d_kernel { - UniformInjector1d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const list_t& box, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , region { box[0], box[1] } - , energy_dist { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t p) const { - typename random_number_pool_t::generator_type rand_gen = pool.get_state(); - - coord_t x { ZERO }; - vec_t v { ZERO }; - x[0] = rand_gen.frand(region[0], region[1]); - energy_dist(x, v, species_index1); - init_prtl_1d(mblock, species1, p + offset1, x[0], v[0], v[1], v[2], ONE); - energy_dist(x, v, species_index2); - init_prtl_1d(mblock, species2, p + offset2, x[0], v[0], v[1], v[2], ONE); - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - EnDist energy_dist; - list_t region; - random_number_pool_t pool; - }; - - /** - * @brief 2D particle-vectorized injection kernel - */ - template class EnDist> - struct UniformInjector2d_kernel { - UniformInjector2d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const list_t& box, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , region { box[0], box[1], box[2], box[3] } - , energy_dist { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t p) const { - typename random_number_pool_t::generator_type rand_gen = pool.get_state(); - - coord_t x { ZERO }; - vec_t v { ZERO }; - x[0] = rand_gen.frand(region[0], region[1]); - x[1] = rand_gen.frand(region[2], region[3]); - energy_dist(x, v, species_index1); - init_prtl_2d(mblock, species1, p + offset1, x[0], x[1], v[0], v[1], v[2], ONE); - energy_dist(x, v, species_index2); - init_prtl_2d(mblock, species2, p + offset2, x[0], x[1], v[0], v[1], v[2], ONE); - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - EnDist energy_dist; - list_t region; - random_number_pool_t pool; - }; - - /** - * @brief 3D particle-vectorized injection kernel - */ - template class EnDist> - struct UniformInjector3d_kernel { - UniformInjector3d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const list_t& box, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , region { box[0], box[1], box[2], box[3], box[4], box[5] } - , energy_dist { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t p) const { - typename random_number_pool_t::generator_type rand_gen = pool.get_state(); - - coord_t x { ZERO }; - vec_t v { ZERO }; - x[0] = rand_gen.frand(region[0], region[1]); - x[1] = rand_gen.frand(region[2], region[3]); - x[2] = rand_gen.frand(region[4], region[5]); - energy_dist(x, v, species_index1); - init_prtl_3d(mblock, species1, p + offset1, x[0], x[1], x[2], v[0], v[1], v[2], ONE); - energy_dist(x, v, species_index2); - init_prtl_3d(mblock, species2, p + offset2, x[0], x[1], x[2], v[0], v[1], v[2], ONE); - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - EnDist energy_dist; - list_t region; - random_number_pool_t pool; - }; - - /** - * @brief Volumetrically uniform particle injector parallelized over particles. - * @tparam D dimension. - * @tparam S simulation engine. - * @tparam EnDist energy distribution [default = Cold]. - * - * @param params simulation parameters. - * @param mblock meshblock. - * @param species species to inject as a list. - * @param ppc_per_spec fiducial number of particles per cell per species. - * @param region region to inject particles as a list of coordinates [optional]. - * @param time current time [optional]. - */ - template class EnDist = Cold> - inline void InjectUniform(const SimulationParams& params, - Meshblock& mblock, - const std::vector& species, - const real_t& ppc_per_spec, - std::vector region = {}, - const real_t& time = ZERO) { - NTTHostErrorIf(species.size() != 2, - "Exactly two species can be injected at the same time"); - auto& sp1 = mblock.particles[species[0] - 1]; - auto& sp2 = mblock.particles[species[1] - 1]; - NTTHostErrorIf( - sp1.charge() != -sp2.charge(), - "Injected species must have the same but opposite charge: q1 = -q2"); - auto ncells = (std::size_t)(mblock.Ni1() * mblock.Ni2() * mblock.Ni3()); - real_t delta_V, full_V; - if (region.size() == 0) { - region = mblock.extent(); - } -#ifdef MINKOWSKI_METRIC - if constexpr (D == Dim1) { - delta_V = (region[1] - region[0]); - full_V = (mblock.extent()[1] - mblock.extent()[0]); - } else if constexpr (D == Dim2) { - delta_V = (region[1] - region[0]) * (region[3] - region[2]); - full_V = (mblock.extent()[1] - mblock.extent()[0]) * - (mblock.extent()[3] - mblock.extent()[2]); - } else if constexpr (D == Dim3) { - delta_V = (region[1] - region[0]) * (region[3] - region[2]) * - (region[5] - region[4]); - full_V = (mblock.extent()[1] - mblock.extent()[0]) * - (mblock.extent()[3] - mblock.extent()[2]) * - (mblock.extent()[5] - mblock.extent()[4]); - } -#else - if constexpr (D == Dim2) { - delta_V = (SQR(region[1]) - SQR(region[0])) * (region[3] - region[2]); - full_V = (SQR(mblock.extent()[1]) - SQR(mblock.extent()[0])) * - constant::PI * HALF; - } else if constexpr (D == Dim3) { - // !TODO: need to be a bit more careful - delta_V = (CUBE(region[1]) - CUBE(region[0])) * (region[3] - region[2]) * - (region[5] - region[4]); - full_V = (CUBE(mblock.extent()[1]) - CUBE(mblock.extent()[0])) * - (4.0 / 3.0) * constant::PI; - } -#endif - ncells = (std::size_t)((real_t)ncells * delta_V / full_V); - - auto npart_per_spec = (std::size_t)((double)(ncells * ppc_per_spec)); - list_t(D)> box { ZERO }; - for (auto i { 0 }; i < 2 * static_cast(D); ++i) { - box[i] = region[i]; - } - - if constexpr (D == Dim1) { - Kokkos::parallel_for( - "InjectUniform", - CreateRangePolicy({ 0 }, { npart_per_spec }), - UniformInjector1d_kernel(params, mblock, sp1, sp2, box, time)); - } else if constexpr (D == Dim2) { - Kokkos::parallel_for( - "InjectUniform", - CreateRangePolicy({ 0 }, { npart_per_spec }), - UniformInjector2d_kernel(params, mblock, sp1, sp2, box, time)); - } else if constexpr (D == Dim3) { - Kokkos::parallel_for( - "InjectUniform", - CreateRangePolicy({ 0 }, { npart_per_spec }), - UniformInjector3d_kernel(params, mblock, sp1, sp2, box, time)); - } - sp1.setNpart(sp1.npart() + npart_per_spec); - sp2.setNpart(sp2.npart() + npart_per_spec); - } - - /* -------------------------------------------------------------------------- */ - /* Volume injection kernels and routines */ - /* -------------------------------------------------------------------------- */ - template - class EnDist, - template - class SpDist, - template - class InjCrit> - struct VolumeInjector1d_kernel { - VolumeInjector1d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const array_t& ind, - const real_t& ppc, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , index { ind } - , nppc { ppc } - , use_weights { params.useWeights() } - , V0 { params.V0() } - , energy_dist { params, mblock } - , spatial_dist { params, mblock } - , inj_criterion { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t i1) const { - // cell node - coord_t xi { static_cast(static_cast(i1) - N_GHOSTS) }; - const auto weight { use_weights - ? (mblock.metric.sqrt_det_h({ xi[0] + HALF }) / V0) - : ONE }; - - random_generator_t rand_gen { pool.get_state() }; - real_t n_inject { nppc }; - coord_t xc { ZERO }; - coord_t xph { ZERO }; - prtldx_t dx1; - vec_t v { ZERO }, v_cart { ZERO }; - - while (n_inject > ZERO) { - dx1 = Random(rand_gen); - xc[0] = xi[0] + dx1; - mblock.metric.x_Code2Cart(xc, xph); - if ((Random(rand_gen) < n_inject) && // # of prtls - inj_criterion(xph) && // injection criterion - (Random(rand_gen) < spatial_dist(xph)) // spatial distribution - ) { - auto p { Kokkos::atomic_fetch_add(&index(), 1) }; - - energy_dist(xph, v, species_index1); - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; - init_prtl_1d_i_di(species1, - offset1 + p, - static_cast(i1) - N_GHOSTS, - dx1, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - - energy_dist(xph, v, species_index2); - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; - init_prtl_1d_i_di(species2, - offset2 + p, - static_cast(i1) - N_GHOSTS, - dx1, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - } - n_inject -= ONE; - } - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - array_t index; - const real_t nppc; - const bool use_weights; - const real_t V0; - EnDist energy_dist; - SpDist spatial_dist; - InjCrit inj_criterion; - random_number_pool_t pool; - }; - - template - class EnDist, - template - class SpDist, - template - class InjCrit> - struct VolumeInjector2d_kernel { - VolumeInjector2d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const array_t& ind, - const real_t& ppc, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , index { ind } - , nppc { ppc } - , use_weights { params.useWeights() } - , V0 { params.V0() } - , energy_dist { params, mblock } - , spatial_dist { params, mblock } - , inj_criterion { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t i1, index_t i2) const { - // cell node - coord_t xi { COORD(i1), COORD(i2) }; - const auto weight { - use_weights - ? (mblock.metric.sqrt_det_h({ xi[0] + HALF, xi[1] + HALF }) / V0) - : ONE - }; - - random_generator_t rand_gen { pool.get_state() }; - real_t n_inject { nppc }; - coord_t xc { ZERO }; - coord_t xph { ZERO }; - prtldx_t dx1, dx2; - vec_t v { ZERO }, v_cart { ZERO }; - - while (n_inject > ZERO) { - dx1 = Random(rand_gen); - dx2 = Random(rand_gen); - xc[0] = xi[0] + dx1; - xc[1] = xi[1] + dx2; -#ifdef MINKOWSKI_METRIC - mblock.metric.x_Code2Cart(xc, xph); -#else - mblock.metric.x_Code2Sph(xc, xph); -#endif - if ((Random(rand_gen) < n_inject) && // # of prtls - inj_criterion(xph) && // injection criterion - (Random(rand_gen) < spatial_dist(xph)) // spatial distribution - ) { - auto p { Kokkos::atomic_fetch_add(&index(), 1) }; - - energy_dist(xph, v, species_index1); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], ZERO }, v, v_cart); -#endif - init_prtl_2d_i_di(species1, - offset1 + p, - COORD(i1), - COORD(i2), - dx1, - dx2, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - - energy_dist(xph, v, species_index2); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], ZERO }, v, v_cart); -#endif - init_prtl_2d_i_di(species2, - offset2 + p, - COORD(i1), - COORD(i2), - dx1, - dx2, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - } - n_inject -= ONE; - } - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - array_t index; - const real_t nppc; - const bool use_weights; - const real_t V0; - EnDist energy_dist; - SpDist spatial_dist; - InjCrit inj_criterion; - random_number_pool_t pool; - }; - - template - class EnDist, - template - class SpDist, - template - class InjCrit> - struct VolumeInjector3d_kernel { - VolumeInjector3d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const array_t& ind, - const real_t& ppc, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , index { ind } - , nppc { ppc } - , use_weights { params.useWeights() } - , V0 { params.V0() } - , energy_dist { params, mblock } - , spatial_dist { params, mblock } - , inj_criterion { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t i1, index_t i2, index_t i3) const { - // cell node - coord_t xi { static_cast(static_cast(i1) - N_GHOSTS), - static_cast(static_cast(i2) - N_GHOSTS), - static_cast(static_cast(i3) - N_GHOSTS) }; - const auto weight { use_weights - ? (mblock.metric.sqrt_det_h( - { xi[0] + HALF, xi[1] + HALF, xi[2] + HALF }) / - V0) - : ONE }; - - random_generator_t rand_gen { pool.get_state() }; - real_t n_inject { nppc }; - coord_t xc { ZERO }; - coord_t xph { ZERO }; - prtldx_t dx1, dx2, dx3; - vec_t v { ZERO }, v_cart { ZERO }; - - while (n_inject > ZERO) { - dx1 = Random(rand_gen); - dx2 = Random(rand_gen); - dx3 = Random(rand_gen); - xc[0] = xi[0] + dx1; - xc[1] = xi[1] + dx2; - xc[2] = xi[2] + dx3; -#ifdef MINKOWSKI_METRIC - mblock.metric.x_Code2Cart(xc, xph); -#else - mblock.metric.x_Code2Sph(xc, xph); -#endif - if ((Random(rand_gen) < n_inject) && // # of prtls - inj_criterion(xph) && // injection criterion - (Random(rand_gen) < spatial_dist(xph)) // spatial distribution - ) { - auto p { Kokkos::atomic_fetch_add(&index(), 1) }; - - energy_dist(xph, v, species_index1); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], xc[2] }, v, v_cart); -#endif - init_prtl_3d_i_di(species1, - offset1 + p, - static_cast(i1) - N_GHOSTS, - static_cast(i2) - N_GHOSTS, - static_cast(i3) - N_GHOSTS, - dx1, - dx2, - dx3, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - - energy_dist(xph, v, species_index2); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], xc[2] }, v, v_cart); -#endif - init_prtl_3d_i_di(species2, - offset2 + p, - static_cast(i1) - N_GHOSTS, - static_cast(i2) - N_GHOSTS, - static_cast(i3) - N_GHOSTS, - dx1, - dx2, - dx3, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - } - n_inject -= ONE; - } - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - array_t index; - const real_t nppc; - const bool use_weights; - const real_t V0; - EnDist energy_dist; - SpDist spatial_dist; - InjCrit inj_criterion; - random_number_pool_t pool; - }; - - /** - * @brief Particle injector parallelized by cells in a volume. - * @tparam D dimension. - * @tparam S simulation engine. - * @tparam EnDist energy distribution [default = Cold]. - * @tparam SpDist spatial distribution [default = Uniform]. - * @tparam InjCrit injection criterion [default = NoCriterion]. - * - * @param params simulation parameters. - * @param mblock meshblock. - * @param species species to inject as a list. - * @param ppc_per_spec fiducial number of particles per cell per species. - * @param region region to inject particles as a list of coordinates [optional]. - * @param time current time [optional]. - */ - template class EnDist = Cold, - template class SpDist = Uniform, - template class InjCrit = NoCriterion> - inline void InjectInVolume(const SimulationParams& params, - Meshblock& mblock, - const std::vector& species, - const real_t& ppc_per_spec, - std::vector region = {}, - const real_t& time = ZERO) { - range_t range_policy; - if (region.size() == 0) { - range_policy = mblock.rangeActiveCells(); - } else if (region.size() == 2 * static_cast(D)) { - tuple_t region_min; - tuple_t region_max; - coord_t xmin_ph { ZERO }, xmax_ph { ZERO }; - coord_t xmin_cu { ZERO }, xmax_cu { ZERO }; - for (short i = 0; i < static_cast(D); ++i) { - xmin_ph[i] = region[2 * i]; - xmax_ph[i] = region[2 * i + 1]; - } - mblock.metric.x_Phys2Code(xmin_ph, xmin_cu); - mblock.metric.x_Phys2Code(xmax_ph, xmax_cu); - for (short i = 0; i < static_cast(D); ++i) { - region_min[i] = static_cast(xmin_cu[i]); - region_max[i] = static_cast(xmax_cu[i]); - } - range_policy = CreateRangePolicy(region_min, region_max); - } else { - NTTHostError("region must be empty or have 2 * D elements"); - } - - NTTHostErrorIf(species.size() != 2, - "Exactly two species can be injected at the same time"); - auto& sp1 = mblock.particles[species[0] - 1]; - auto& sp2 = mblock.particles[species[1] - 1]; - NTTHostErrorIf( - sp1.charge() != -sp2.charge(), - "Injected species must have the same but opposite charge: q1 = -q2"); - array_t ind("ind_inj"); - if constexpr (D == Dim1) { - Kokkos::parallel_for( - "InjectInVolume", - range_policy, - VolumeInjector1d_kernel(params, - mblock, - sp1, - sp2, - ind, - ppc_per_spec, - time)); - } else if constexpr (D == Dim2) { - Kokkos::parallel_for( - "InjectInVolume", - range_policy, - VolumeInjector2d_kernel(params, - mblock, - sp1, - sp2, - ind, - ppc_per_spec, - time)); - } else if constexpr (D == Dim3) { - Kokkos::parallel_for( - "InjectInVolume", - range_policy, - VolumeInjector3d_kernel(params, - mblock, - sp1, - sp2, - ind, - ppc_per_spec, - time)); - } - - auto ind_h = Kokkos::create_mirror(ind); - Kokkos::deep_copy(ind_h, ind); - sp1.setNpart(sp1.npart() + ind_h()); - sp2.setNpart(sp2.npart() + ind_h()); - } - - /* -------------------------------------------------------------------------- */ - - template - class EnDist, - template - class InjCrit> - struct NonUniformInjector1d_kernel { - NonUniformInjector1d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const array_t& ind, - const ndarray_t<1>& ppc, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , index { ind } - , ppc_per_spec { ppc } - , use_weights { params.useWeights() } - , energy_dist { params, mblock } - , inj_criterion { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t i1) const { - // cell node - const auto i1_ = static_cast(i1) - N_GHOSTS; - coord_t xi = { static_cast(i1_) }; - - random_generator_t rand_gen { pool.get_state() }; - real_t n_inject { ppc_per_spec(i1_) }; - coord_t xc { ZERO }; - coord_t xph { ZERO }; - prtldx_t dx1; - vec_t v { ZERO }, v_cart { ZERO }; - - while (n_inject > ZERO) { - dx1 = Random(rand_gen); - xc[0] = xi[0] + dx1; - mblock.metric.x_Code2Phys(xc, xph); - if ((Random(rand_gen) < n_inject) && // # of prtls - inj_criterion(xph) // injection criterion - ) { - auto p { Kokkos::atomic_fetch_add(&index(), 1) }; - - energy_dist(xph, v, species_index1); - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; - init_prtl_1d_i_di(species1, - offset1 + p, - i1_, - dx1, - v_cart[0], - v_cart[1], - v_cart[2], - ONE); - - energy_dist(xph, v, species_index2); - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; - init_prtl_1d_i_di(species2, - offset2 + p, - i1_, - dx1, - v_cart[0], - v_cart[1], - v_cart[2], - ONE); - } - n_inject -= ONE; - } - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - array_t index; - ndarray_t<1> ppc_per_spec; - const bool use_weights; - EnDist energy_dist; - InjCrit inj_criterion; - random_number_pool_t pool; - }; - - template - class EnDist, - template - class InjCrit> - struct NonUniformInjector2d_kernel { - NonUniformInjector2d_kernel(const SimulationParams& pr, - const Meshblock& mb, - const Particles& sp1, - const Particles& sp2, - const array_t& ind, - const ndarray_t<2>& ppc, - const real_t&) - : params { pr } - , mblock { mb } - , species1 { sp1 } - , species2 { sp2 } - , species_index1 { sp1.index() } - , species_index2 { sp2.index() } - , offset1 { sp1.npart() } - , offset2 { sp2.npart() } - , index { ind } - , ppc_per_spec { ppc } - , use_weights { params.useWeights() } - , V0 { params.V0() } - , energy_dist { params, mblock } - , inj_criterion { params, mblock } - , pool { *(mblock.random_pool_ptr) } {} - - Inline void operator()(index_t i1, index_t i2) const { - // cell node - const auto i1_ = i1 - static_cast(N_GHOSTS); - const auto i2_ = i2 - static_cast(N_GHOSTS); - coord_t xi = { static_cast(i1_), static_cast(i2_) }; - const auto weight { - use_weights - ? (mblock.metric.sqrt_det_h({ xi[0] + HALF, xi[1] + HALF }) / V0) - : ONE - }; - - random_generator_t rand_gen { pool.get_state() }; - real_t n_inject { ppc_per_spec(i1_, i2_) }; - coord_t xc { ZERO }; - coord_t xph { ZERO }; - prtldx_t dx1, dx2; - vec_t v { ZERO }, v_cart { ZERO }; - - while (n_inject > ZERO) { - dx1 = Random(rand_gen); - dx2 = Random(rand_gen); - xc[0] = xi[0] + dx1; - xc[1] = xi[1] + dx2; - mblock.metric.x_Code2Phys(xc, xph); - if ((Random(rand_gen) < n_inject) && // # of prtls - inj_criterion(xph) // injection criterion - ) { - auto p { Kokkos::atomic_fetch_add(&index(), 1) }; - - energy_dist(xph, v, species_index1); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#elif defined(GRPIC_ENGINE) - mblock.metric.v3_Hat2Cov({ xc[0], xc[1] }, v, v_cart); -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], ZERO }, v, v_cart); -#endif - init_prtl_2d_i_di(species1, - offset1 + p, - i1_, - i2_, - dx1, - dx2, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - - energy_dist(xph, v, species_index2); -#ifdef MINKOWSKI_METRIC - v_cart[0] = v[0]; - v_cart[1] = v[1]; - v_cart[2] = v[2]; -#elif defined(GRPIC_ENGINE) - mblock.metric.v3_Hat2Cov({ xc[0], xc[1] }, v, v_cart); -#else - mblock.metric.v3_Hat2Cart({ xc[0], xc[1], ZERO }, v, v_cart); -#endif - init_prtl_2d_i_di(species2, - offset2 + p, - i1_, - i2_, - dx1, - dx2, - v_cart[0], - v_cart[1], - v_cart[2], - weight); - } - n_inject -= ONE; - } - pool.free_state(rand_gen); - } - - private: - SimulationParams params; - Meshblock mblock; - Particles species1, species2; - const int species_index1, species_index2; - const std::size_t offset1, offset2; - array_t index; - ndarray_t<2> ppc_per_spec; - const bool use_weights; - const real_t V0; - EnDist energy_dist; - InjCrit inj_criterion; - random_number_pool_t pool; - }; - - /** - * @brief Particle injector parallelized by cells in a volume ... - * @brief ... up to certain number density. - * @tparam D dimension. - * @tparam S simulation engine. - * @tparam EnDist energy distribution [default = Cold]. - * @tparam InjCrit injection criterion [default = NoCriterion]. - * - * @param params simulation parameters. - * @param mblock meshblock. - * @param species species to inject as a list. - * @param ppc_per_spec target injection ppc per species. - * @param region region to inject particles as a list of coordinates [optional]. - * @param time current time [optional]. - */ - template class EnDist = Cold, - template class InjCrit = NoCriterion> - inline void InjectNonUniform(const SimulationParams& params, - Meshblock& mblock, - const std::vector& species, - const ndarray_t<(short)(D)>& ppc_per_spec, - std::vector region = {}, - const real_t& time = ZERO) { - EnDist energy_dist(params, mblock); - InjCrit inj_criterion(params, mblock); - range_t range_policy; - if (region.size() == 0) { - range_policy = mblock.rangeActiveCells(); - } else if (region.size() == 2 * static_cast(D)) { - tuple_t region_min; - tuple_t region_max; - coord_t xmin_ph { ZERO }, xmax_ph { ZERO }; - coord_t xmin_cu { ZERO }, xmax_cu { ZERO }; - for (short i = 0; i < static_cast(D); ++i) { - xmin_ph[i] = region[2 * i]; - xmax_ph[i] = region[2 * i + 1]; - } - mblock.metric.x_Phys2Code(xmin_ph, xmin_cu); - mblock.metric.x_Phys2Code(xmax_ph, xmax_cu); - for (short i = 0; i < static_cast(D); ++i) { - region_min[i] = static_cast(xmin_cu[i]); - region_max[i] = static_cast(xmax_cu[i]); - } - range_policy = CreateRangePolicy(region_min, region_max); - } else { - NTTHostError("region must be empty or have 2 * D elements"); - } - - NTTHostErrorIf(species.size() != 2, - "Exactly two species can be injected at the same time"); - auto& sp1 = mblock.particles[species[0] - 1]; - auto& sp2 = mblock.particles[species[1] - 1]; - NTTHostErrorIf( - sp1.charge() != -sp2.charge(), - "Injected species must have the same but opposite charge: q1 = -q2"); - array_t ind("ind_inj"); - if constexpr (D == Dim1) { - Kokkos::parallel_for( - "InjectNonUniform", - range_policy, - NonUniformInjector1d_kernel(params, - mblock, - sp1, - sp2, - ind, - ppc_per_spec, - time)); - } else if constexpr (D == Dim2) { - Kokkos::parallel_for( - "InjectNonUniform", - range_policy, - NonUniformInjector2d_kernel(params, - mblock, - sp1, - sp2, - ind, - ppc_per_spec, - time)); - } else if constexpr (D == Dim3) { - NTTHostError("Not implemented"); - } - - auto ind_h = Kokkos::create_mirror(ind); - Kokkos::deep_copy(ind_h, ind); - sp1.setNpart(sp1.npart() + ind_h()); - sp2.setNpart(sp2.npart() + ind_h()); - } - -} // namespace ntt - -#endif // ARCHETYPES_PARTICLE_INJECTOR_H diff --git a/legacy/src/framework/utils/timer.cpp b/legacy/src/framework/utils/timer.cpp deleted file mode 100644 index ba285cedf..000000000 --- a/legacy/src/framework/utils/timer.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "wrapper.h" -#include "timer.h" - -#include -#include -#include -#include -#include -#include - -namespace ntt { - - auto TimeUnit::getMultiplier() const -> double { return multiplier; } - auto operator<<(std::ostream& os, const TimeUnit& v) -> std::ostream& { - return os << v.unitname; - } - - Time::Time(const long double& v, const TimeUnit& u) { - value = static_cast(v); - unit = &u; - } - // auto Time::getValue() const -> long double { return value; } - // void Time::convert(const TimeUnit& to) { - // if (&to != unit) { - // value = value * unit->getMultiplier() / to.getMultiplier(); - // unit = &to; - // } - // } - auto Time::represent(const TimeUnit& to) const -> Time { - if (&to != unit) { - return Time(value * unit->getMultiplier() / to.getMultiplier(), to); - } else { - return Time(value, to); - } - } - auto operator<<(std::ostream& os, const Time& t) -> std::ostream& { - return os << t.value << " " << *(t.unit); - } - // auto Time::operator-() const -> Time { return Time(-(this->value), *(this->unit)); } - // auto operator+(const Time& t1, const Time& t2) -> Time { - // if (t1.unit == t2.unit) { - // return Time(static_cast(t1.value + t2.value), *(t1.unit)); - // } else { - // const TimeUnit* main_unit; - // if (t1.unit->getMultiplier() < t2.unit->getMultiplier()) - // main_unit = t1.unit; - // else - // main_unit = t2.unit; - // return Time(static_cast(t1.represent(*main_unit).value - // + t2.represent(*main_unit).value), - // *(main_unit)); - // } - // } - // auto operator-(const Time& t1, const Time& t2) -> Time { return (t1 + (-t2)); } - // auto operator*(double x, const Time& t) -> Time { return Time(t.value * x, *(t.unit)); } - // auto operator*(const Time& t, double x) -> Time { return Time(t.value * x, *(t.unit)); } - - namespace { // anonymous namespace - void timeNow(TimeContainer& time) { time = std::chrono::system_clock::now(); } - void timeElapsed(TimeContainer& time_start, Time& time_elapsed) { - long double dt; - dt = std::chrono::duration(std::chrono::system_clock::now() - time_start) - .count(); - time_elapsed = Time(dt, second); - } - } // namespace - - void Timer::start() { - init = true; - on = true; - timeNow(t_start); - } - void Timer::check() { - assert(init && "# Error: timer is not initialized."); - assert(on && "# Error: timer is not running."); - timeElapsed(t_start, t_elapsed); - } - void Timer::stop() { - check(); - on = false; - } - auto Timer::getElapsedIn(const TimeUnit& u) const -> long double { - if (init) { - return t_elapsed.represent(u).value; - } else { - return 0.0; - } - } - auto Timer::getName() const -> std::string { return name; } - void Timer::printElapsed(std::ostream& os, const TimeUnit& u) const { - auto repr = t_elapsed.represent(u); - os << std::setw(25) << std::left << "timer `" + name + "`" - << ": " << repr; - if (on) { os << " (and running)"; } - } - void Timer::printElapsed(const TimeUnit& u) const { printElapsed(std::cout, u); } - - // TimerCollection::TimerCollection(const std::vector& timers) { - // for (auto& t : timers) { - // m_timers.push_back(Timer(t)); - // // m_timers.emplace_back(Timer(t)); - // } - // } - - void TimerCollection::start(const int& i) { m_timers[i - 1].start(); } - - void TimerCollection::stop(const int& i) { m_timers[i - 1].stop(); } - - void TimerCollection::printAll(std::ostream& os, const TimeUnit& u) const { - os << "==============================" << std::endl; - for (auto& t : m_timers) { - t.printElapsed(os, u); - os << std::endl; - } - os << "------------------------------" << std::endl; - } - - void TimerCollection::printAll(const TimeUnit& u) const { printAll(std::cout, u); } - -} // namespace ntt diff --git a/legacy/src/framework/utils/timer.h b/legacy/src/framework/utils/timer.h deleted file mode 100644 index 5793ecede..000000000 --- a/legacy/src/framework/utils/timer.h +++ /dev/null @@ -1,95 +0,0 @@ -#ifndef UTILS_TIMER_H -#define UTILS_TIMER_H - -#include -#include -#include -#include - -namespace ntt { - // Type to be used for s/ms/us/ms - class TimeUnit { - private: - double multiplier; - std::string unitname; - - public: - TimeUnit() = default; - TimeUnit(const double& mult, const std::string& unit) - : multiplier {mult}, unitname {unit} {} - ~TimeUnit() = default; - [[nodiscard]] auto getMultiplier() const -> double; - friend auto operator<<(std::ostream& os, TimeUnit const& v) -> std::ostream&; - }; - - // declaration of s/ms/us/ms - inline const TimeUnit second(1, "s"); - inline const TimeUnit millisecond(1e-3, "ms"); - inline const TimeUnit microsecond(1e-6, "us"); - inline const TimeUnit nanosecond(1e-9, "ns"); - - // Type to keep track of timestamp - class Time { - private: - long double value {0.0}; - const TimeUnit* unit; - - public: - Time() = default; - Time(const long double& v, const TimeUnit& u = second); - ~Time() = default; - // void convert(const TimeUnit& to); - // [[nodiscard]] auto getValue() const -> long double; - [[nodiscard]] auto represent(const TimeUnit& to) const -> Time; - // Time operator=(const Time & rhs); - auto operator-() const -> Time; - - // friend auto operator+(Time const&, Time const&) -> Time; - // friend auto operator-(Time const&, Time const&) -> Time; - // friend auto operator*(double x, Time const& t) -> Time; - // friend auto operator*(Time const&, double x) -> Time; - friend auto operator<<(std::ostream& os, Time const& t) -> std::ostream&; - friend class Timer; - }; - - using TimeContainer = std::chrono::time_point; - - class Timer { - private: - std::string name; - bool init {false}; - bool on {false}; - TimeContainer t_start; - Time t_elapsed; - - public: - Timer() : name("NULL"), t_elapsed {0.0} {} - Timer(const std::string& name) : name(std::move(name)), t_elapsed {0.0} {} - ~Timer() = default; - void start(); - void check(); - void stop(); - [[nodiscard]] auto getElapsedIn(const TimeUnit& u) const -> long double; - [[nodiscard]] auto getName() const -> std::string; - void printElapsed(const TimeUnit& u = second) const; - void printElapsed(std::ostream& os = std::cout, const TimeUnit& u = second) const; - }; - - class TimerCollection { - private: - // TODO: maybe map? - std::vector m_timers; - - public: - // TimerCollection(const std::vector& timers); - TimerCollection(const std::vector& timers) : m_timers(timers) {} - ~TimerCollection() = default; - void start(const int& i); - void stop(const int& i); - void printAll(std::ostream& os = std::cout, const TimeUnit& u = second) const; - void printAll(const TimeUnit& u = second) const; - }; - -} // namespace ntt - -#endif // TIMER_H diff --git a/legacy/src/framework/writer.cpp b/legacy/src/framework/writer.cpp deleted file mode 100644 index 1ef56273e..000000000 --- a/legacy/src/framework/writer.cpp +++ /dev/null @@ -1,68 +0,0 @@ - -if constexpr (D == Dim1) { - extent = fmt::format("{} {} 0 0 0 0", params.extent()[0], params.extent()[1]); -} else if constexpr (D == Dim2) { - extent = fmt::format("{} {} {} {} 0 0", - params.extent()[0], - params.extent()[1], - params.extent()[2], - params.extent()[3]); -} else if constexpr (D == Dim3) { - extent = fmt::format("{} {} {} {} {} {}", - params.extent()[0], - params.extent()[1], - params.extent()[2], - params.extent()[3], - params.extent()[4], - params.extent()[5]); -} - -const std::string vtk_xml = R"( - - - " - - - - - step - - - - - )"; - -if constexpr (D == Dim1) { - extent = fmt::format("0 {} 0 0 0 0", params.resolution()[0] + 1); -} else if constexpr (D == Dim2) { - extent - = fmt::format("0 {} 0 {} 0 0", params.resolution()[0] + 1, params.resolution()[1] + - 1); -} else if constexpr (D == Dim3) { - extent = fmt::format("0 {} 0 {} 0 {}", - params.resolution()[0] + 1, - params.resolution()[1] + 1, - params.resolution()[2] + 1); -} -const std::string vtk_xml = R"( - - - - - - - - time - - - - - )"; - -std::cout << vtk_xml << std::endl; - -m_io.DefineAttribute("vtk.xml", vtk_xml); diff --git a/legacy/src/grpic/boundaries/fields_bc.cpp b/legacy/src/grpic/boundaries/fields_bc.cpp deleted file mode 100644 index 239bf7655..000000000 --- a/legacy/src/grpic/boundaries/fields_bc.cpp +++ /dev/null @@ -1,82 +0,0 @@ - -// !LEGACY -// // theta = 0 boundary -// Kokkos::parallel_for( -// "2d_bc_theta0", -// CreateRangePolicy({0, 0}, {m_mblock.i1_max() + N_GHOSTS, m_mblock.i2_min() + 1}), -// Lambda(index_t i, index_t j) { -// // mblock.em0(i, j, em::ex3) = ZERO; -// // mblock.em(i, j, em::ex3) = ZERO; -// }); - -// // theta = pi boundary -// Kokkos::parallel_for( -// "2d_bc_thetaPi", -// CreateRangePolicy({0, m_mblock.i2_max()}, {m_mblock.i1_max() + N_GHOSTS, -// m_mblock.i2_max() + N_GHOSTS}), Lambda(index_t i, index_t j) { -// mblock.em0(i, j, em::ex3) = ZERO; - -// mblock.em(i, j, em::ex3) = ZERO; -// }); - -// auto j_min {mblock.i2_min()}; -// // Kokkos::parallel_for( -// // "2d_bc_theta0", CreateRangePolicy({mblock.i1_min() - 1}, {mblock.i1_max()}), -// Lambda(index_t i) { -// // // mblock.em0(i, j_min, em::ex3) = ZERO; -// // // mblock.em(i, j_min, em::ex3) = ZERO; -// // mblock.em0(i, j_min - 1, em::ex2) = -mblock.em0(i, j_min, em::ex2); -// // mblock.em(i, j_min - 1, em::ex2) = -mblock.em(i, j_min, em::ex2); -// // }); - -// // // theta = pi boundary -// // auto j_max {mblock.i2_max()}; -// // Kokkos::parallel_for( -// // "2d_bc_thetaPi", CreateRangePolicy({mblock.i1_min() - 1}, {m_mblock.i1_max()}), -// Lambda(index_t i) { -// // // mblock.em0(i, j_max, em::ex3) = ZERO; -// // // mblock.em(i, j_max, em::ex3) = ZERO; -// // mblock.em0(i, j_max, em::ex2) = mblock.em0(i, j_max - 1, em::ex2); -// // mblock.em(i, j_max, em::ex2) = mblock.em(i, j_max - 1, em::ex2); -// // }); - -// auto pGen {this->m_pGen}; -// auto br_func {&(this->m_pGen.userTargetField_br_cntrv)}; -// Kokkos::parallel_for( -// "2d_absorbing bc", -// CreateRangePolicy({mblock.i1_min(), mblock.i2_min()}, {mblock.i1_max() + 1, -// mblock.i2_max() + 1}), Lambda(index_t i, index_t j) { -// real_t i_ {static_cast(static_cast(i) - N_GHOSTS)}; -// real_t j_ {static_cast(static_cast(j) - N_GHOSTS)}; - -// // i -// vec_t rth_; -// mblock.metric.x_Code2Sph({i_, j_}, rth_); -// if (rth_[0] > r_absorb) { -// real_t delta_r1 {(rth_[0] - r_absorb) / (r_max - r_absorb)}; -// real_t sigma_r1 {absorb_norm * (ONE - math::exp(absorb_coeff * HEAVISIDE(delta_r1) * -// CUBE(delta_r1)))}; -// // !HACK -// // real_t br_target = pGen.userTargetField_br_cntrv(mblock, {i_, j_ + HALF}); -// real_t br_target = br_func(mblock, {i_, j_ + HALF}); -// // real_t br_target {ZERO}; -// mblock.em0(i, j, em::bx1) = (ONE - sigma_r1) * mblock.em0(i, j, em::bx1) + sigma_r1 -// * br_target; mblock.em(i, j, em::bx1) = (ONE - sigma_r1) * mblock.em(i, j, em::bx1) -// + sigma_r1 * br_target; -// } -// // i + 1/2 -// mblock.metric.x_Code2Sph({i_ + HALF, j_}, rth_); -// if (rth_[0] > r_absorb) { -// real_t delta_r2 {(rth_[0] - r_absorb) / (r_max - r_absorb)}; -// real_t sigma_r2 {absorb_norm * (ONE - math::exp(absorb_coeff * HEAVISIDE(delta_r2) * -// CUBE(delta_r2)))}; -// // !HACK -// // real_t bth_target {pGen->userTargetField_bth_cntrv(mblock, {i_ + HALF, j_})}; -// real_t bth_target {ZERO}; -// mblock.em0(i, j, em::bx2) = (ONE - sigma_r2) * mblock.em0(i, j, em::bx2) + sigma_r2 -// * bth_target; mblock.em(i, j, em::bx2) = (ONE - sigma_r2) * mblock.em(i, j, -// em::bx2) + sigma_r2 * bth_target; mblock.em0(i, j, em::bx3) = (ONE - sigma_r2) * -// mblock.em0(i, j, em::bx3); mblock.em(i, j, em::bx3) = (ONE - sigma_r2) * -// mblock.em(i, j, em::bx3); -// } -// }); diff --git a/legacy/src/nttiny.cpp b/legacy/src/nttiny.cpp deleted file mode 100644 index 6820900f0..000000000 --- a/legacy/src/nttiny.cpp +++ /dev/null @@ -1,124 +0,0 @@ - -#elif defined(GRPIC_ENGINE) - // interpolate and transform to spherical - // !TODO: mirrors for em0, aux etc - ntt::vec_t Dsph {ZERO}, Bsph {ZERO}, D0sph {ZERO}, B0sph {ZERO}; - if ((i >= 0) && (i < sx1) && (j >= 0) && (j < sx2)) { - if (m_fields_to_plot[f].at(0) == 'D') { - if (m_fields_to_plot[f].at(1) == '0') { - real_t Dx1, Dx2, Dx3; - // interpolate to cell center - Dx1 = 0.5 - * (m_sim.meshblock.em0(I, J, ntt::em::ex1) - + m_sim.meshblock.em0(I, J + 1, ntt::em::ex1)); - Dx2 = 0.5 - * (m_sim.meshblock.em0(I, J, ntt::em::ex2) - + m_sim.meshblock.em0(I + 1, J, ntt::em::ex2)); - Dx3 = 0.25 - * (m_sim.meshblock.em0(I, J, ntt::em::ex3) - + m_sim.meshblock.em0(I + 1, J, ntt::em::ex3) - + m_sim.meshblock.em0(I, J + 1, ntt::em::ex3) - + m_sim.meshblock.em0(I + 1, J + 1, ntt::em::ex3)); - m_sim.meshblock.metric.v_Cntr2SphCntrv( - {i_ + HALF, j_ + HALF}, {Dx1, Dx2, Dx3}, D0sph); - } else { - real_t Dx1, Dx2, Dx3; - // interpolate to cell center - Dx1 = 0.5 - * (m_sim.meshblock.em(I, J, ntt::em::ex1) - + m_sim.meshblock.em(I, J + 1, ntt::em::ex1)); - Dx2 = 0.5 - * (m_sim.meshblock.em(I, J, ntt::em::ex2) - + m_sim.meshblock.em(I + 1, J, ntt::em::ex2)); - Dx3 = 0.25 - * (m_sim.meshblock.em(I, J, ntt::em::ex3) - + m_sim.meshblock.em(I + 1, J, ntt::em::ex3) - + m_sim.meshblock.em(I, J + 1, ntt::em::ex3) - + m_sim.meshblock.em(I + 1, J + 1, ntt::em::ex3)); - m_sim.meshblock.metric.v_Cntr2SphCntrv( - {i_ + HALF, j_ + HALF}, {Dx1, Dx2, Dx3}, Dsph); - } - } else if (m_fields_to_plot[f].at(0) == 'B') { - if (m_fields_to_plot[f].at(1) == '0') { - real_t Bx1, Bx2, Bx3; - // interpolate to cell center - Bx1 = 0.5 - * (m_sim.meshblock.em0(I + 1, J, ntt::em::bx1) - + m_sim.meshblock.em0(I, J, ntt::em::bx1)); - Bx2 = 0.5 - * (m_sim.meshblock.em0(I, J + 1, ntt::em::bx2) - + m_sim.meshblock.em0(I, J, ntt::em::bx2)); - Bx3 = m_sim.meshblock.em0(I, J, ntt::em::bx3); - m_sim.meshblock.metric.v_Cntr2SphCntrv( - {i_ + HALF, j_ + HALF}, {Bx1, Bx2, Bx3}, B0sph); - } else { - real_t Bx1, Bx2, Bx3; - // interpolate to cell center - Bx1 = 0.5 - * (m_sim.meshblock.em(I + 1, J, ntt::em::bx1) - + m_sim.meshblock.em(I, J, ntt::em::bx1)); - Bx2 = 0.5 - * (m_sim.meshblock.em(I, J + 1, ntt::em::bx2) - + m_sim.meshblock.em(I, J, ntt::em::bx2)); - Bx3 = m_sim.meshblock.em(I, J, ntt::em::bx3); - m_sim.meshblock.metric.v_Cntr2SphCntrv( - {i_ + HALF, j_ + HALF}, {Bx1, Bx2, Bx3}, Bsph); - } - } - } else { - Dsph[0] = m_sim.meshblock.em(I, J, ntt::em::ex1); - Dsph[1] = m_sim.meshblock.em(I, J, ntt::em::ex2); - Dsph[2] = m_sim.meshblock.em(I, J, ntt::em::ex3); - Bsph[0] = m_sim.meshblock.em(I, J, ntt::em::bx1); - Bsph[1] = m_sim.meshblock.em(I, J, ntt::em::bx2); - Bsph[2] = m_sim.meshblock.em(I, J, ntt::em::bx3); - D0sph[0] = m_sim.meshblock.em0(I, J, ntt::em::ex1); - D0sph[1] = m_sim.meshblock.em0(I, J, ntt::em::ex2); - D0sph[2] = m_sim.meshblock.em0(I, J, ntt::em::ex3); - B0sph[0] = m_sim.meshblock.em0(I, J, ntt::em::bx1); - B0sph[1] = m_sim.meshblock.em0(I, J, ntt::em::bx2); - B0sph[2] = m_sim.meshblock.em0(I, J, ntt::em::bx3); - } - real_t val {ZERO}; - if (m_fields_to_plot[f] == "Dr") { - val = Dsph[0]; - } else if (m_fields_to_plot[f] == "Dtheta") { - val = Dsph[1]; - } else if (m_fields_to_plot[f] == "Dphi") { - val = Dsph[2]; - } else if (m_fields_to_plot[f] == "Br") { - val = Bsph[0]; - } else if (m_fields_to_plot[f] == "Btheta") { - val = Bsph[1]; - } else if (m_fields_to_plot[f] == "Bphi") { - val = Bsph[2]; - } else if (m_fields_to_plot[f] == "Er") { - val = m_sim.meshblock.aux(I, J, ntt::em::ex1); - } else if (m_fields_to_plot[f] == "Etheta") { - val = m_sim.meshblock.aux(I, J, ntt::em::ex2); - } else if (m_fields_to_plot[f] == "Ephi") { - val = m_sim.meshblock.aux(I, J, ntt::em::ex3); - } else if (m_fields_to_plot[f] == "Hr") { - val = m_sim.meshblock.aux(I, J, ntt::em::bx1); - } else if (m_fields_to_plot[f] == "Htheta") { - val = m_sim.meshblock.aux(I, J, ntt::em::bx2); - } else if (m_fields_to_plot[f] == "Hphi") { - val = m_sim.meshblock.aux(I, J, ntt::em::bx3); - } else if (m_fields_to_plot[f] == "D0r") { - val = D0sph[0]; - } else if (m_fields_to_plot[f] == "D0theta") { - val = D0sph[1]; - } else if (m_fields_to_plot[f] == "D0phi") { - val = D0sph[2]; - } else if (m_fields_to_plot[f] == "B0r") { - val = B0sph[0]; - } else if (m_fields_to_plot[f] == "B0theta") { - val = B0sph[1]; - } else if (m_fields_to_plot[f] == "B0phi") { - val = B0sph[2]; - } else if (m_fields_to_plot[f] == "Aphi") { - val = m_sim.meshblock.aphi(I, J, 0); - } - auto idx = Index(i, j); - (this->fields)[m_fields_to_plot[f]][idx] = val; -#endif diff --git a/legacy/src/particle_pusher_sr.hpp b/legacy/src/particle_pusher_sr.hpp deleted file mode 100644 index 637fcbc60..000000000 --- a/legacy/src/particle_pusher_sr.hpp +++ /dev/null @@ -1,966 +0,0 @@ -/** - * @file kernels/particle_pusher_sr.h - * @brief Particle pusher for the SR - * @implements - * - kernel::sr::Pusher_kernel<> - * - kernel::sr::PusherBase_kernel<> - * @depends: - * - enums.h - * - global.h - * - arch/kokkos_aliases.h - * - arch/traits.h - * - utils/error.h - * - utils/numeric.h - * @namespaces: - * - kernel::sr:: - */ - -#ifndef KERNELS_PARTICLE_PUSHER_SR_HPP -#define KERNELS_PARTICLE_PUSHER_SR_HPP - -#include "enums.h" -#include "global.h" - -#include "arch/kokkos_aliases.h" -#include "arch/traits.h" -#include "utils/error.h" -#include "utils/numeric.h" - -#include -#include - -/* -------------------------------------------------------------------------- */ -/* Local macros */ -/* -------------------------------------------------------------------------- */ -#define from_Xi_to_i(XI, I) \ - { I = static_cast((XI)); } - -#define from_Xi_to_i_di(XI, I, DI) \ - { \ - from_Xi_to_i((XI), (I)); \ - DI = static_cast((XI)) - static_cast(I); \ - } - -#define i_di_to_Xi(I, DI) static_cast((I)) + static_cast((DI)) - -/* -------------------------------------------------------------------------- */ - -namespace kernel::sr { - using namespace ntt; - - // Pushers - struct Boris_t {}; - - struct Vay_t {}; - - struct Photon_t {}; - - struct Extforce_t {}; - - struct GCA_t {}; - - // Cooling - struct NoCooling_t {}; - - struct Synchrotron_t {}; - - template - struct Union_t {}; - - template - struct is_contained; - - template - struct is_contained : is_contained {}; - - template - struct is_contained : std::true_type {}; - - template - struct is_contained : std::false_type {}; - - struct Massive_t {}; - - struct Massless_t {}; - - /** - * @tparam M Metric - * @tparam PG Problem generator - * @tparam P Particle pusher - * @tparam Cs Cooling algorithms - */ - template - struct Pusher_kernel { - static_assert(M::is_metric, "M must be a metric class"); - // static_assert(PG::is_pgen, "PG must be a problem generator class"); - static constexpr auto D = M::Dim; - // using base_t::defines_fx1; - // using base_t::defines_fx2; - // using base_t::defines_fx3; - - private: - const ndfield_t EB; - const unsigned short sp; - array_t i1, i2, i3; - array_t i1_prev, i2_prev, i3_prev; - array_t dx1, dx2, dx3; - array_t dx1_prev, dx2_prev, dx3_prev; - array_t ux1, ux2, ux3; - array_t phi; - array_t tag; - const M metric; - // const PG pgen; - - const real_t time, coeff, dt; - const int ni1, ni2, ni3; - bool is_absorb_i1min { false }, is_absorb_i1max { false }; - bool is_absorb_i2min { false }, is_absorb_i2max { false }; - bool is_absorb_i3min { false }, is_absorb_i3max { false }; - bool is_periodic_i1min { false }, is_periodic_i1max { false }; - bool is_periodic_i2min { false }, is_periodic_i2max { false }; - bool is_periodic_i3min { false }, is_periodic_i3max { false }; - bool is_axis_i2min { false }, is_axis_i2max { false }; - // gca parameters - const real_t gca_larmor, gca_EovrB_sqr; - // synchrotron cooling parameters - const real_t coeff_sync; - - public: - Pusher_kernel(const ndfield_t& EB, - unsigned short sp, - array_t& i1, - array_t& i2, - array_t& i3, - array_t& i1_prev, - array_t& i2_prev, - array_t& i3_prev, - array_t& dx1, - array_t& dx2, - array_t& dx3, - array_t& dx1_prev, - array_t& dx2_prev, - array_t& dx3_prev, - array_t& ux1, - array_t& ux2, - array_t& ux3, - array_t& phi, - array_t& tag, - const M& metric, - // const PG& pgen, - real_t time, - real_t coeff, - real_t dt, - int ni1, - int ni2, - int ni3, - const boundaries_t& boundaries, - real_t gca_larmor_max, - real_t gca_eovrb_max, - real_t coeff_sync) : - EB { EB }, - sp { sp }, - i1 { i1 }, - i2 { i2 }, - i3 { i3 }, - i1_prev { i1_prev }, - i2_prev { i2_prev }, - i3_prev { i3_prev }, - dx1 { dx1 }, - dx2 { dx2 }, - dx3 { dx3 }, - dx1_prev { dx1_prev }, - dx2_prev { dx2_prev }, - dx3_prev { dx3_prev }, - ux1 { ux1 }, - ux2 { ux2 }, - ux3 { ux3 }, - phi { phi }, - tag { tag }, - metric { metric }, - // pgen { pgen }, - time { time }, - coeff { coeff }, - dt { dt }, - ni1 { ni1 }, - ni2 { ni2 }, - ni3 { ni3 }, - gca_larmor { gca_larmor_max }, - gca_EovrB_sqr { SQR(gca_eovrb_max) }, - coeff_sync { coeff_sync } { - - raise::ErrorIf(boundaries.size() < 1, "boundaries defined incorrectly", HERE); - is_absorb_i1min = (boundaries[0].first == PrtlBC::ATMOSPHERE) || - (boundaries[0].first == PrtlBC::ABSORB); - is_absorb_i1max = (boundaries[0].second == PrtlBC::ATMOSPHERE) || - (boundaries[0].second == PrtlBC::ABSORB); - is_periodic_i1min = (boundaries[0].first == PrtlBC::PERIODIC); - is_periodic_i1max = (boundaries[0].second == PrtlBC::PERIODIC); - if constexpr ((D == Dim::_2D) || (D == Dim::_3D)) { - raise::ErrorIf(boundaries.size() < 2, "boundaries defined incorrectly", HERE); - is_absorb_i2min = (boundaries[1].first == PrtlBC::ATMOSPHERE) || - (boundaries[1].first == PrtlBC::ABSORB); - is_absorb_i2max = (boundaries[1].second == PrtlBC::ATMOSPHERE) || - (boundaries[1].second == PrtlBC::ABSORB); - is_periodic_i2min = (boundaries[1].first == PrtlBC::PERIODIC); - is_periodic_i2max = (boundaries[1].second == PrtlBC::PERIODIC); - is_axis_i2min = (boundaries[1].first == PrtlBC::AXIS); - is_axis_i2max = (boundaries[1].second == PrtlBC::AXIS); - } - if constexpr (D == Dim::_3D) { - raise::ErrorIf(boundaries.size() < 3, "boundaries defined incorrectly", HERE); - is_absorb_i3min = (boundaries[2].first == PrtlBC::ATMOSPHERE) || - (boundaries[2].first == PrtlBC::ABSORB); - is_absorb_i3max = (boundaries[2].second == PrtlBC::ATMOSPHERE) || - (boundaries[2].second == PrtlBC::ABSORB); - is_periodic_i3min = (boundaries[2].first == PrtlBC::PERIODIC); - is_periodic_i3max = (boundaries[2].second == PrtlBC::PERIODIC); - } - } - - Inline void synchrotronDrag(index_t& p, - vec_t& u_prime, - const vec_t& e0, - const vec_t& b0) const { - real_t gamma_prime_sqr = ONE / math::sqrt(ONE + NORM_SQR(u_prime[0], - u_prime[1], - u_prime[2])); - u_prime[0] *= gamma_prime_sqr; - u_prime[1] *= gamma_prime_sqr; - u_prime[2] *= gamma_prime_sqr; - gamma_prime_sqr = SQR(ONE / gamma_prime_sqr); - const real_t beta_dot_e { - DOT(u_prime[0], u_prime[1], u_prime[2], e0[0], e0[1], e0[2]) - }; - vec_t e_plus_beta_cross_b { - e0[0] + CROSS_x1(u_prime[0], u_prime[1], u_prime[2], b0[0], b0[1], b0[2]), - e0[1] + CROSS_x2(u_prime[0], u_prime[1], u_prime[2], b0[0], b0[1], b0[2]), - e0[2] + CROSS_x3(u_prime[0], u_prime[1], u_prime[2], b0[0], b0[1], b0[2]) - }; - vec_t kappaR { - CROSS_x1(e_plus_beta_cross_b[0], - e_plus_beta_cross_b[1], - e_plus_beta_cross_b[2], - b0[0], - b0[1], - b0[2]) + - beta_dot_e * e0[0], - CROSS_x2(e_plus_beta_cross_b[0], - e_plus_beta_cross_b[1], - e_plus_beta_cross_b[2], - b0[0], - b0[1], - b0[2]) + - beta_dot_e * e0[1], - CROSS_x3(e_plus_beta_cross_b[0], - e_plus_beta_cross_b[1], - e_plus_beta_cross_b[2], - b0[0], - b0[1], - b0[2]) + - beta_dot_e * e0[2], - }; - const real_t chiR_sqr { NORM_SQR(e_plus_beta_cross_b[0], - e_plus_beta_cross_b[1], - e_plus_beta_cross_b[2]) - - SQR(beta_dot_e) }; - ux1(p) += coeff_sync * (kappaR[0] - gamma_prime_sqr * u_prime[0] * chiR_sqr); - ux2(p) += coeff_sync * (kappaR[1] - gamma_prime_sqr * u_prime[1] * chiR_sqr); - ux3(p) += coeff_sync * (kappaR[2] - gamma_prime_sqr * u_prime[2] * chiR_sqr); - } - - /* photon --------------------------------------------------------------- */ - Inline void operator()(Photon_t, index_t p) const { - if (tag(p) != ParticleTag::alive) { - return; - } - coord_t xp_Cd { ZERO }; - getPrtlPos(p, xp_Cd); - posUpd(p, xp_Cd); - } - - // /* Vay/Boris, no gca, no ext force, no cooling -------------------------- */ - // template - // Inline void operator()(P, index_t p) const { - // if (tag(p) != ParticleTag::alive) { - // return; - // } - // coord_t xp_Cd { ZERO }; - // getPrtlPos(p, xp_Cd); - // vec_t ei { ZERO }, bi { ZERO }; - // vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - - // getInterpFlds(p, ei, bi); - // metric.template transform_xyz(xp_Cd, ei, ei_Cart); - // metric.template transform_xyz(xp_Cd, bi, bi_Cart); - // velUpd(P {}, p, ei_Cart, bi_Cart); - // posUpd(p, xp_Cd); - // } - - /* general case --------------------------------------------------------- */ - template - Inline void operator()(Union_t, index_t p) const { - if (tag(p) != ParticleTag::alive) { - return; - } - coord_t xp_Cd { ZERO }; - getPrtlPos(p, xp_Cd); - // update cartesian velocity - vec_t ei { ZERO }, bi { ZERO }; - vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - vec_t force_Cart { ZERO }; - vec_t u_prime { ZERO }; - vec_t ei_Cart_rad { ZERO }, bi_Cart_rad { ZERO }; - bool is_gca { false }; - - getInterpFlds(p, ei, bi); - metric.template transform_xyz(xp_Cd, ei, ei_Cart); - metric.template transform_xyz(xp_Cd, bi, bi_Cart); - if constexpr (is_contained>::value) { - // backup fields & velocities to use later in cooling - ei_Cart_rad[0] = ei_Cart[0]; - ei_Cart_rad[1] = ei_Cart[1]; - ei_Cart_rad[2] = ei_Cart[2]; - bi_Cart_rad[0] = bi_Cart[0]; - bi_Cart_rad[1] = bi_Cart[1]; - bi_Cart_rad[2] = bi_Cart[2]; - u_prime[0] = ux1(p); - u_prime[1] = ux2(p); - u_prime[2] = ux3(p); - } - if constexpr (is_contained>::value) { - // coord_t xp_Ph { ZERO }; - // xp_Ph[0] = metric.template convert<1, Crd::Cd, Crd::Ph>(xp_Cd[0]); - // if constexpr (M::PrtlDim != Dim::_1D) { - // xp_Ph[1] = metric.template convert<2, Crd::Cd, Crd::Ph>(xp_Cd[1]); - // } - // if constexpr (M::PrtlDim == Dim::_3D) { - // xp_Ph[2] = metric.template convert<3, Crd::Cd, Crd::Ph>(xp_Cd[2]); - // } - // real_t fx1 { ZERO }, fx2 { ZERO }, fx3 { ZERO }; - // metric.template transform_xyz( - // xp_Cd, - // { pgen.ext_force.fx1(sp, time, xp_Ph), - // pgen.ext_force.fx2(sp, time, xp_Ph), - // pgen.ext_force.fx3(sp, time, xp_Ph) }, - // force_Cart); - } - if constexpr (is_contained>::value) { - /* hybrid GCA/conventional mode --------------------------------- */ - const auto E2 { NORM_SQR(ei_Cart[0], ei_Cart[1], ei_Cart[2]) }; - const auto B2 { NORM_SQR(bi_Cart[0], bi_Cart[1], bi_Cart[2]) }; - const auto rL { math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))) * - dt / (TWO * math::abs(coeff) * math::sqrt(B2)) }; - if (B2 > ZERO && rL < gca_larmor && (E2 / B2) < gca_EovrB_sqr) { - is_gca = true; - // update with GCA - if constexpr (is_contained>::value) { - velUpd(Union_t {}, p, force_Cart, ei_Cart, bi_Cart); - } else { - velUpd(GCA_t {}, p, ei_Cart, bi_Cart); - } - } else { - // update with conventional pusher - if constexpr (is_contained>::value) { - ux1(p) += HALF * dt * force_Cart[0]; - ux2(p) += HALF * dt * force_Cart[1]; - ux3(p) += HALF * dt * force_Cart[2]; - } - velUpd(P {}, p, ei_Cart, bi_Cart); - if constexpr (is_contained>::value) { - ux1(p) += HALF * dt * force_Cart[0]; - ux2(p) += HALF * dt * force_Cart[1]; - ux3(p) += HALF * dt * force_Cart[2]; - } - } - } else { - /* conventional pusher mode ------------------------------------- */ - // update with conventional pusher - if constexpr (is_contained>::value) { - ux1(p) += HALF * dt * force_Cart[0]; - ux2(p) += HALF * dt * force_Cart[1]; - ux3(p) += HALF * dt * force_Cart[2]; - } - velUpd(P {}, p, ei_Cart, bi_Cart); - if constexpr (is_contained>::value) { - ux1(p) += HALF * dt * force_Cart[0]; - ux2(p) += HALF * dt * force_Cart[1]; - ux3(p) += HALF * dt * force_Cart[2]; - } - } - // cooling - if constexpr (is_contained>::value) { - if (!is_gca) { - u_prime[0] = HALF * (u_prime[0] + ux1(p)); - u_prime[1] = HALF * (u_prime[1] + ux2(p)); - u_prime[2] = HALF * (u_prime[2] + ux3(p)); - synchrotronDrag(p, u_prime, ei_Cart_rad, bi_Cart_rad); - } - } - // update position - posUpd(p, xp_Cd); - } - - template - Inline void posUpd(index_t& p, coord_t& xp) const { - - // get cartesian velocity - const real_t inv_energy { ONE / getEnergy(T {}, p) }; - vec_t vp_Cart { ux1(p) * inv_energy, - ux2(p) * inv_energy, - ux3(p) * inv_energy }; - // get cartesian position - coord_t xp_Cart { ZERO }; - metric.template convert_xyz(xp, xp_Cart); - // update cartesian position - for (auto d = 0u; d < M::PrtlDim; ++d) { - xp_Cart[d] += vp_Cart[d] * dt; - } - // transform back to code - metric.template convert_xyz(xp_Cart, xp); - - // update x1 - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - i1_prev(p) = i1(p); - dx1_prev(p) = dx1(p); - from_Xi_to_i_di(xp[0], i1(p), dx1(p)); - } - - // update x2 & phi - if constexpr (D == Dim::_2D || D == Dim::_3D) { - i2_prev(p) = i2(p); - dx2_prev(p) = dx2(p); - from_Xi_to_i_di(xp[1], i2(p), dx2(p)); - if constexpr (D == Dim::_2D && M::PrtlDim == Dim::_3D) { - phi(p) = xp[2]; - } - } - - // update x3 - if constexpr (D == Dim::_3D) { - i3_prev(p) = i3(p); - dx3_prev(p) = dx3(p); - from_Xi_to_i_di(xp[2], i3(p), dx3(p)); - } - boundaryConditions(p); - } - - /** - * @brief update particle velocities - * @param P pusher algorithm - * @param p, e0, b0 index & interpolated fields - */ - Inline void velUpd(Boris_t, - index_t& p, - vec_t& e0, - vec_t& b0) const { - - real_t COEFF { coeff }; - - e0[0] *= COEFF; - e0[1] *= COEFF; - e0[2] *= COEFF; - vec_t u0 { ux1(p) + e0[0], ux2(p) + e0[1], ux3(p) + e0[2] }; - - COEFF *= ONE / math::sqrt(ONE + NORM_SQR(u0[0], u0[1], u0[2])); - b0[0] *= COEFF; - b0[1] *= COEFF; - b0[2] *= COEFF; - COEFF = TWO / (ONE + NORM_SQR(b0[0], b0[1], b0[2])); - - vec_t u1 { - (u0[0] + CROSS_x1(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF, - (u0[1] + CROSS_x2(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF, - (u0[2] + CROSS_x3(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF - }; - - u0[0] += CROSS_x1(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[0]; - u0[1] += CROSS_x2(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[1]; - u0[2] += CROSS_x3(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[2]; - - ux1(p) = u0[0]; - ux2(p) = u0[1]; - ux3(p) = u0[2]; - } - - Inline void velUpd(Vay_t, index_t& p, vec_t& e0, vec_t& b0) const { - auto COEFF { coeff }; - e0[0] *= COEFF; - e0[1] *= COEFF; - e0[2] *= COEFF; - - b0[0] *= COEFF; - b0[1] *= COEFF; - b0[2] *= COEFF; - - COEFF = ONE / math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))); - - vec_t u1 { - (ux1(p) + TWO * e0[0] + - CROSS_x1(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF), - (ux2(p) + TWO * e0[1] + - CROSS_x2(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF), - (ux3(p) + TWO * e0[2] + - CROSS_x3(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF) - }; - COEFF = DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]); - auto COEFF2 { ONE + NORM_SQR(u1[0], u1[1], u1[2]) - - NORM_SQR(b0[0], b0[1], b0[2]) }; - - COEFF = ONE / - math::sqrt( - INV_2 * (COEFF2 + math::sqrt(SQR(COEFF2) + - FOUR * (SQR(b0[0]) + SQR(b0[1]) + - SQR(b0[2]) + SQR(COEFF))))); - COEFF2 = ONE / (ONE + SQR(b0[0] * COEFF) + SQR(b0[1] * COEFF) + - SQR(b0[2] * COEFF)); - - ux1(p) = COEFF2 * (u1[0] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[0] * COEFF) + - u1[1] * b0[2] * COEFF - u1[2] * b0[1] * COEFF); - ux2(p) = COEFF2 * (u1[1] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[1] * COEFF) + - u1[2] * b0[0] * COEFF - u1[0] * b0[2] * COEFF); - ux3(p) = COEFF2 * (u1[2] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[2] * COEFF) + - u1[0] * b0[1] * COEFF - u1[1] * b0[0] * COEFF); - } - - Inline void velUpd(GCA_t, index_t& p, vec_t& e0, vec_t& b0) const { - const auto eb_sqr { NORM_SQR(e0[0], e0[1], e0[2]) + - NORM_SQR(b0[0], b0[1], b0[2]) }; - - const vec_t wE { - CROSS_x1(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - CROSS_x2(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - CROSS_x3(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr - }; - - { - const auto b_norm_inv { ONE / NORM(b0[0], b0[1], b0[2]) }; - b0[0] *= b_norm_inv; - b0[1] *= b_norm_inv; - b0[2] *= b_norm_inv; - } - auto upar { DOT(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) + - coeff * TWO * DOT(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) }; - - real_t factor; - { - const auto wE_sqr { NORM_SQR(wE[0], wE[1], wE[2]) }; - if (wE_sqr < static_cast(0.01)) { - factor = ONE + wE_sqr + TWO * SQR(wE_sqr) + FIVE * SQR(wE_sqr) * wE_sqr; - } else { - factor = (ONE - math::sqrt(ONE - FOUR * wE_sqr)) / (TWO * wE_sqr); - } - } - const vec_t vE_Cart { wE[0] * factor, wE[1] * factor, wE[2] * factor }; - const auto Gamma { math::sqrt(ONE + SQR(upar)) / - math::sqrt( - ONE - NORM_SQR(vE_Cart[0], vE_Cart[1], vE_Cart[2])) }; - ux1(p) = upar * b0[0] + vE_Cart[0] * Gamma; - ux2(p) = upar * b0[1] + vE_Cart[1] * Gamma; - ux3(p) = upar * b0[2] + vE_Cart[2] * Gamma; - } - - Inline void velUpd(Union_t, - index_t& p, - vec_t& f0, - vec_t& e0, - vec_t& b0) const { - const auto eb_sqr { NORM_SQR(e0[0], e0[1], e0[2]) + - NORM_SQR(b0[0], b0[1], b0[2]) }; - - const vec_t wE { - CROSS_x1(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - CROSS_x2(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - CROSS_x3(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr - }; - - { - const auto b_norm_inv { ONE / NORM(b0[0], b0[1], b0[2]) }; - b0[0] *= b_norm_inv; - b0[1] *= b_norm_inv; - b0[2] *= b_norm_inv; - } - auto upar { DOT(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) + - coeff * TWO * DOT(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) + - dt * DOT(f0[0], f0[1], f0[2], b0[0], b0[1], b0[2]) }; - - real_t factor; - { - const auto wE_sqr { NORM_SQR(wE[0], wE[1], wE[2]) }; - if (wE_sqr < static_cast(0.01)) { - factor = ONE + wE_sqr + TWO * SQR(wE_sqr) + FIVE * SQR(wE_sqr) * wE_sqr; - } else { - factor = (ONE - math::sqrt(ONE - FOUR * wE_sqr)) / (TWO * wE_sqr); - } - } - const vec_t vE_Cart { wE[0] * factor, wE[1] * factor, wE[2] * factor }; - const auto Gamma { math::sqrt(ONE + SQR(upar)) / - math::sqrt( - ONE - NORM_SQR(vE_Cart[0], vE_Cart[1], vE_Cart[2])) }; - ux1(p) = upar * b0[0] + vE_Cart[0] * Gamma; - ux2(p) = upar * b0[1] + vE_Cart[1] * Gamma; - ux3(p) = upar * b0[2] + vE_Cart[2] * Gamma; - } - - // Getters - Inline void getPrtlPos(index_t& p, coord_t& xp) const { - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - xp[0] = i_di_to_Xi(i1(p), dx1(p)); - } - if constexpr (D == Dim::_2D) { - xp[1] = i_di_to_Xi(i2(p), dx2(p)); - if constexpr (M::PrtlDim == Dim::_3D) { - xp[2] = phi(p); - } - } - if constexpr (D == Dim::_3D) { - xp[1] = i_di_to_Xi(i2(p), dx2(p)); - xp[2] = i_di_to_Xi(i3(p), dx3(p)); - } - } - - Inline auto getEnergy(Massive_t, index_t& p) const -> real_t { - return math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))); - } - - Inline auto getEnergy(Massless_t, index_t& p) const -> real_t { - return math::sqrt(SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))); - } - - Inline void getInterpFlds(index_t& p, - vec_t& e0, - vec_t& b0) const { - if constexpr (D == Dim::_1D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // first order - real_t c0, c1; - - // Ex1 - // interpolate to nodes - c0 = HALF * (EB(i, em::ex1) + EB(i - 1, em::ex1)); - c1 = HALF * (EB(i, em::ex1) + EB(i + 1, em::ex1)); - // interpolate from nodes to the particle position - e0[0] = c0 * (ONE - dx1_) + c1 * dx1_; - // Ex2 - c0 = EB(i, em::ex2); - c1 = EB(i + 1, em::ex2); - e0[1] = c0 * (ONE - dx1_) + c1 * dx1_; - // Ex3 - c0 = EB(i, em::ex3); - c1 = EB(i + 1, em::ex3); - e0[2] = c0 * (ONE - dx1_) + c1 * dx1_; - - // Bx1 - c0 = EB(i, em::bx1); - c1 = EB(i + 1, em::bx1); - b0[0] = c0 * (ONE - dx1_) + c1 * dx1_; - // Bx2 - c0 = HALF * (EB(i - 1, em::bx2) + EB(i, em::bx2)); - c1 = HALF * (EB(i, em::bx2) + EB(i + 1, em::bx2)); - b0[1] = c0 * (ONE - dx1_) + c1 * dx1_; - // Bx3 - c0 = HALF * (EB(i - 1, em::bx3) + EB(i, em::bx3)); - c1 = HALF * (EB(i, em::bx3) + EB(i + 1, em::bx3)); - b0[2] = c0 * (ONE - dx1_) + c1 * dx1_; - } else if constexpr (D == Dim::_2D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // first order - real_t c000, c100, c010, c110, c00, c10; - - // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, em::ex1) + EB(i - 1, j, em::ex1)); - c100 = HALF * (EB(i, j, em::ex1) + EB(i + 1, j, em::ex1)); - c010 = HALF * (EB(i, j + 1, em::ex1) + EB(i - 1, j + 1, em::ex1)); - c110 = HALF * (EB(i, j + 1, em::ex1) + EB(i + 1, j + 1, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[0] = c00 * (ONE - dx2_) + c10 * dx2_; - // Ex2 - c000 = HALF * (EB(i, j, em::ex2) + EB(i, j - 1, em::ex2)); - c100 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j - 1, em::ex2)); - c010 = HALF * (EB(i, j, em::ex2) + EB(i, j + 1, em::ex2)); - c110 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j + 1, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[1] = c00 * (ONE - dx2_) + c10 * dx2_; - // Ex3 - c000 = EB(i, j, em::ex3); - c100 = EB(i + 1, j, em::ex3); - c010 = EB(i, j + 1, em::ex3); - c110 = EB(i + 1, j + 1, em::ex3); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[2] = c00 * (ONE - dx2_) + c10 * dx2_; - - // Bx1 - c000 = HALF * (EB(i, j, em::bx1) + EB(i, j - 1, em::bx1)); - c100 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j - 1, em::bx1)); - c010 = HALF * (EB(i, j, em::bx1) + EB(i, j + 1, em::bx1)); - c110 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[0] = c00 * (ONE - dx2_) + c10 * dx2_; - // Bx2 - c000 = HALF * (EB(i - 1, j, em::bx2) + EB(i, j, em::bx2)); - c100 = HALF * (EB(i, j, em::bx2) + EB(i + 1, j, em::bx2)); - c010 = HALF * (EB(i - 1, j + 1, em::bx2) + EB(i, j + 1, em::bx2)); - c110 = HALF * (EB(i, j + 1, em::bx2) + EB(i + 1, j + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[1] = c00 * (ONE - dx2_) + c10 * dx2_; - // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, em::bx3) + EB(i - 1, j, em::bx3) + - EB(i, j - 1, em::bx3) + EB(i, j, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, em::bx3) + EB(i, j, em::bx3) + - EB(i + 1, j - 1, em::bx3) + EB(i + 1, j, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, em::bx3) + EB(i - 1, j + 1, em::bx3) + - EB(i, j, em::bx3) + EB(i, j + 1, em::bx3)); - c110 = INV_4 * (EB(i, j, em::bx3) + EB(i, j + 1, em::bx3) + - EB(i + 1, j, em::bx3) + EB(i + 1, j + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[2] = c00 * (ONE - dx2_) + c10 * dx2_; - } else if constexpr (D == Dim::_3D) { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // first order - real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, - c11, c0, c1; - - // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, k, em::ex1) + EB(i - 1, j, k, em::ex1)); - c100 = HALF * (EB(i, j, k, em::ex1) + EB(i + 1, j, k, em::ex1)); - c010 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i - 1, j + 1, k, em::ex1)); - c110 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i + 1, j + 1, k, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - // interpolate to nodes - c001 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i - 1, j, k + 1, em::ex1)); - c101 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i + 1, j, k + 1, em::ex1)); - c011 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i - 1, j + 1, k + 1, em::ex1)); - c111 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i + 1, j + 1, k + 1, em::ex1)); - // interpolate from nodes to the particle position - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[0] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Ex2 - c000 = HALF * (EB(i, j, k, em::ex2) + EB(i, j - 1, k, em::ex2)); - c100 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j - 1, k, em::ex2)); - c010 = HALF * (EB(i, j, k, em::ex2) + EB(i, j + 1, k, em::ex2)); - c110 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j + 1, k, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c001 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j - 1, k + 1, em::ex2)); - c101 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j - 1, k + 1, em::ex2)); - c011 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j + 1, k + 1, em::ex2)); - c111 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j + 1, k + 1, em::ex2)); - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[1] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Ex3 - c000 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k - 1, em::ex3)); - c100 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k - 1, em::ex3)); - c010 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k - 1, em::ex3)); - c110 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k - 1, em::ex3)); - c001 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k + 1, em::ex3)); - c101 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k + 1, em::ex3)); - c011 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k + 1, em::ex3)); - c111 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k + 1, em::ex3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[2] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx1 - c000 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j - 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j - 1, k - 1, em::bx1)); - c100 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j - 1, k, em::bx1) + - EB(i + 1, j, k - 1, em::bx1) + EB(i + 1, j - 1, k - 1, em::bx1)); - c001 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j, k + 1, em::bx1) + - EB(i, j - 1, k, em::bx1) + EB(i, j - 1, k + 1, em::bx1)); - c101 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k + 1, em::bx1) + - EB(i + 1, j - 1, k, em::bx1) + EB(i + 1, j - 1, k + 1, em::bx1)); - c010 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j + 1, k - 1, em::bx1)); - c110 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k - 1, em::bx1) + - EB(i + 1, j + 1, k - 1, em::bx1) + EB(i + 1, j + 1, k, em::bx1)); - c011 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j + 1, k + 1, em::bx1) + EB(i, j, k + 1, em::bx1)); - c111 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j + 1, k, em::bx1) + - EB(i + 1, j + 1, k + 1, em::bx1) + EB(i + 1, j, k + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[0] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx2 - c000 = INV_4 * (EB(i - 1, j, k - 1, em::bx2) + EB(i - 1, j, k, em::bx2) + - EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2)); - c100 = INV_4 * (EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2) + - EB(i + 1, j, k - 1, em::bx2) + EB(i + 1, j, k, em::bx2)); - c001 = INV_4 * (EB(i - 1, j, k, em::bx2) + EB(i - 1, j, k + 1, em::bx2) + - EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2)); - c101 = INV_4 * (EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2) + - EB(i + 1, j, k, em::bx2) + EB(i + 1, j, k + 1, em::bx2)); - c010 = INV_4 * - (EB(i - 1, j + 1, k - 1, em::bx2) + EB(i - 1, j + 1, k, em::bx2) + - EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2)); - c110 = INV_4 * - (EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2) + - EB(i + 1, j + 1, k - 1, em::bx2) + EB(i + 1, j + 1, k, em::bx2)); - c011 = INV_4 * - (EB(i - 1, j + 1, k, em::bx2) + EB(i - 1, j + 1, k + 1, em::bx2) + - EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2)); - c111 = INV_4 * - (EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2) + - EB(i + 1, j + 1, k, em::bx2) + EB(i + 1, j + 1, k + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[1] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, k, em::bx3) + EB(i - 1, j, k, em::bx3) + - EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3) + - EB(i + 1, j - 1, k, em::bx3) + EB(i + 1, j, k, em::bx3)); - c001 = INV_4 * - (EB(i - 1, j - 1, k + 1, em::bx3) + EB(i - 1, j, k + 1, em::bx3) + - EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3)); - c101 = INV_4 * - (EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3) + - EB(i + 1, j - 1, k + 1, em::bx3) + EB(i + 1, j, k + 1, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, k, em::bx3) + EB(i - 1, j + 1, k, em::bx3) + - EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3)); - c110 = INV_4 * (EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3) + - EB(i + 1, j, k, em::bx3) + EB(i + 1, j + 1, k, em::bx3)); - c011 = INV_4 * - (EB(i - 1, j, k + 1, em::bx3) + EB(i - 1, j + 1, k + 1, em::bx3) + - EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3)); - c111 = INV_4 * - (EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3) + - EB(i + 1, j, k + 1, em::bx3) + EB(i + 1, j + 1, k + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[2] = c0 * (ONE - dx3_) + c1 * dx3_; - } - } - - // Extra - Inline void boundaryConditions(index_t& p) const { - if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { - if (i1(p) < 0) { - if (is_periodic_i1min) { - i1(p) += ni1; - i1_prev(p) += ni1; - } else if (is_absorb_i1min) { - tag(p) = ParticleTag::dead; - } - } else if (i1(p) >= ni1) { - if (is_periodic_i1max) { - i1(p) -= ni1; - i1_prev(p) -= ni1; - } else if (is_absorb_i1max) { - tag(p) = ParticleTag::dead; - } - } - } - if constexpr (D == Dim::_2D || D == Dim::_3D) { - if (i2(p) < 0) { - if (is_periodic_i2min) { - i2(p) += ni2; - i2_prev(p) += ni2; - } else if (is_absorb_i2min) { - tag(p) = ParticleTag::dead; - } else if (is_axis_i2min) { - i2(p) = 0; - dx2(p) = ONE - dx2(p); - } - } else if (i2(p) >= ni2) { - if (is_periodic_i2max) { - i2(p) -= ni2; - i2_prev(p) -= ni2; - } else if (is_absorb_i2max) { - tag(p) = ParticleTag::dead; - } else if (is_axis_i2max) { - i2(p) = ni2 - 1; - dx2(p) = ONE - dx2(p); - } - } - } - if constexpr (D == Dim::_3D) { - if (i3(p) < 0) { - if (is_periodic_i3min) { - i3(p) += ni3; - i3_prev(p) += ni3; - } else if (is_absorb_i3min) { - tag(p) = ParticleTag::dead; - } - } else if (i3(p) >= ni3) { - if (is_periodic_i3max) { - i3(p) -= ni3; - i3_prev(p) -= ni3; - } else if (is_absorb_i3max) { - tag(p) = ParticleTag::dead; - } - } - } - } - }; - -} // namespace kernel::sr - -#undef from_Xi_to_i_di -#undef from_Xi_to_i -#undef i_di_to_Xi - -#endif // KERNELS_PARTICLE_PUSHER_SR_HPP diff --git a/legacy/src/pic/boundaries/currents_bc.cpp b/legacy/src/pic/boundaries/currents_bc.cpp deleted file mode 100644 index 012ad85c7..000000000 --- a/legacy/src/pic/boundaries/currents_bc.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/** - * @file fields_bc.cpp - * @brief Absorbing boundary conditions for the currents at rmax (for 2D axisymmetric). - * @implements: `CurrentsBoundaryConditions` method of the `PIC` class - * @includes: `currents_bc.hpp - * @depends: `pic.h` - * - * @notes: - Periodic boundary conditions are implemented in `currents_exch.cpp` - * - */ - -#include "wrapper.h" - -#include "pic.h" - -#include "meshblock/meshblock.h" - -#ifndef MINKOWSKI_METRIC - #include "currents_bc.hpp" -#endif - -namespace ntt { - /** - * @brief Special boundary conditions on currents - */ -#ifdef MINKOWSKI_METRIC - template - void PIC::CurrentsBoundaryConditions() {} - -#else - - template <> - void PIC::CurrentsBoundaryConditions() { - // auto& mblock = this->meshblock; - // if (mblock.boundaries[0][1] == BoundaryCondition::ABSORB) { - // auto& pgen = this->problem_generator; - // auto params = *(this->params()); - // auto r_absorb = params.metricParameters()[2]; - // auto r_max = mblock.metric.x1_max; - // const auto i1_absorb = (std::size_t)(mblock.metric.x1_Sph2Code(r_absorb)); - // NTTHostErrorIf(i1_absorb >= mblock.i1_max(), - // "Absorbing layer is too small, consider " - // "increasing r_absorb"); - // /** - // * . . . . . . . . . . . . . - // * . . - // * . . - // * . ^= = = = = = = =^ . - // * . |* * * * * * * *\ . - // * . |* * * * * * * *\ . - // * . | \ . - // * . | \ . - // * . ^- - - - - - - -^ . - // * . . - // * . . - // * . . . . . . . . . . . . . - // * - // */ - // Kokkos::parallel_for( - // "CurrentsBoundaryConditions", - // CreateRangePolicy({ i1_absorb, 0 }, - // { mblock.i1_max(), mblock.i2_max() }), - // AbsorbCurrents_kernel(mblock, pgen, r_absorb, r_max)); - // } - } - - template <> - void PIC::CurrentsBoundaryConditions() { - NTTHostError("not applicable"); - } - - template <> - void PIC::CurrentsBoundaryConditions() { - NTTHostError("not implemented"); - } -#endif - -} // namespace ntt - -#ifdef MINKOWSKI_METRIC -template void ntt::PIC::CurrentsBoundaryConditions(); -template void ntt::PIC::CurrentsBoundaryConditions(); -template void ntt::PIC::CurrentsBoundaryConditions(); -#endif \ No newline at end of file diff --git a/legacy/src/pic/boundaries/currents_bc.hpp b/legacy/src/pic/boundaries/currents_bc.hpp deleted file mode 100644 index 19026bcfd..000000000 --- a/legacy/src/pic/boundaries/currents_bc.hpp +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef PIC_CURRENTS_BC_H -#define PIC_CURRENTS_BC_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" - -#include PGEN_HEADER - -namespace ntt { - // /** - // * @brief Algorithms for PIC current boundary conditions. - // * @tparam D Dimension. - // */ - // template - // class AbsorbCurrents_kernel { - // Meshblock m_mblock; - // ProblemGenerator m_pgen; - // real_t m_rabsorb; - // real_t m_rmax; - - // public: - // /** - // * @brief Constructor. - // * @param mblock Meshblock. - // * @param pgen Problem generator. - // * @param rabsorb Absorbing radius. - // * @param rmax Maximum radius. - // */ - // AbsorbCurrents_kernel(const Meshblock& mblock, - // const ProblemGenerator& pgen, - // real_t r_absorb, - // real_t r_max) : - // m_mblock { mblock }, - // m_pgen { pgen }, - // m_rabsorb { r_absorb }, - // m_rmax { r_max } {} - - // /** - // * @brief 2D implementation of the algorithm. - // * @param i1 index. - // * @param i2 index. - // */ - // Inline void operator()(index_t, index_t) const; - // }; - - // template <> - // Inline void AbsorbCurrents_kernel::operator()(index_t i, index_t j) const { - // const real_t i_ { static_cast(static_cast(i) - N_GHOSTS) }; - // const real_t j_ { static_cast(static_cast(j) - N_GHOSTS) }; - - // const real_t i1 - // // vec_t rth_; - // // m_mblock.metric.x_Code2Sph({ i_, j_, ZERO }, rth_); - // real_t delta_r1 { (rth_[0] - m_rabsorb) / (m_rmax - m_rabsorb) }; - // real_t sigma_r1 { HEAVISIDE(delta_r1) * delta_r1 * delta_r1 * delta_r1 }; - - // JX1(i, j) = (ONE - sigma_r1) * JX1(i, j); - // JX2(i, j) = (ONE - sigma_r1) * JX2(i, j); - // JX3(i, j) = (ONE - sigma_r1) * JX3(i, j); - // } -} // namespace ntt - -#endif diff --git a/legacy/src/pic/fields/ampere_curv.hpp b/legacy/src/pic/fields/ampere_curv.hpp deleted file mode 100644 index 12cf9cdf2..000000000 --- a/legacy/src/pic/fields/ampere_curv.hpp +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef PIC_AMPERE_CURVILINEAR_H -#define PIC_AMPERE_CURVILINEAR_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" - -#include "io/output.h" -#include "meshblock/meshblock.h" - -namespace ntt { - /** - * @brief Algorithm for the Ampere's law: `dE/dt = curl B` in curvilinear space. - * @tparam D Dimension. - */ - template - class Ampere_kernel { - Meshblock m_mblock; - real_t m_coeff; - - public: - /** - * @brief Constructor. - * @param mblock Meshblock. - * @param coeff Coefficient to be multiplied by dE/dt = coeff * curl B. - */ - Ampere_kernel(const Meshblock& mblock, const real_t& coeff) : - m_mblock(mblock), - m_coeff(coeff) {} - - /** - * @brief 2D version of the algorithm. - * @param i1 index. - * @param i2 index. - */ - Inline void operator()(index_t i1, index_t i2) const; - /** - * @brief 3D version of the algorithm. - * @param i1 index. - * @param i2 index. - * @param i3 index. - */ - Inline void operator()(index_t i1, index_t i2, index_t i3) const; - }; - - template <> - Inline void Ampere_kernel::operator()(index_t i, index_t j) const { - real_t i_ { static_cast(static_cast(i) - N_GHOSTS) }; - real_t j_ { static_cast(static_cast(j) - N_GHOSTS) }; - - real_t inv_sqrt_detH_ij { ONE / m_mblock.metric.sqrt_det_h({ i_, j_ }) }; - real_t inv_sqrt_detH_iPj { ONE / m_mblock.metric.sqrt_det_h({ i_ + HALF, j_ }) }; - real_t inv_sqrt_detH_ijP { ONE / m_mblock.metric.sqrt_det_h({ i_, j_ + HALF }) }; - real_t h1_ijM { m_mblock.metric.h_11({ i_, j_ - HALF }) }; - real_t h1_ijP { m_mblock.metric.h_11({ i_, j_ + HALF }) }; - real_t h2_iPj { m_mblock.metric.h_22({ i_ + HALF, j_ }) }; - real_t h2_iMj { m_mblock.metric.h_22({ i_ - HALF, j_ }) }; - real_t h3_iMjP { m_mblock.metric.h_33({ i_ - HALF, j_ + HALF }) }; - real_t h3_iPjM { m_mblock.metric.h_33({ i_ + HALF, j_ - HALF }) }; - real_t h3_iPjP { m_mblock.metric.h_33({ i_ + HALF, j_ + HALF }) }; - - EX1(i, j) += m_coeff * inv_sqrt_detH_iPj * - (h3_iPjP * BX3(i, j) - h3_iPjM * BX3(i, j - 1)); - EX2(i, j) += m_coeff * inv_sqrt_detH_ijP * - (h3_iMjP * BX3(i - 1, j) - h3_iPjP * BX3(i, j)); - EX3(i, j) += m_coeff * inv_sqrt_detH_ij * - (h1_ijM * BX1(i, j - 1) - h1_ijP * BX1(i, j) + - h2_iPj * BX2(i, j) - h2_iMj * BX2(i - 1, j)); - } - - template <> - Inline void Ampere_kernel::operator()(index_t, index_t, index_t) const { - // 3d curvilinear ampere not implemented - } - - /** - * @brief Algorithm for the Ampere's law: `dE/dt = curl B` in curvilinear - * space near the polar axes (integral form). - * @tparam D Dimension. - */ - template - class AmperePoles_kernel { - Meshblock m_mblock; - real_t m_coeff; - const std::size_t m_ni2; - - public: - /** - * @brief Constructor. - * @param mblock Meshblock. - * @param coeff Coefficient to be multiplied by dE/dt = coeff * curl B. - */ - AmperePoles_kernel(const Meshblock& mblock, const real_t& coeff) : - m_mblock(mblock), - m_coeff(coeff), - m_ni2(m_mblock.Ni2()) {} - - /** - * @brief Implementation of the algorithm. - * @param i radial index. - */ - Inline void operator()(index_t i) const; - }; - - template <> - Inline void AmperePoles_kernel::operator()(index_t i) const { - index_t j_min { N_GHOSTS }; - index_t j_max { m_ni2 + N_GHOSTS }; - - real_t i_ { static_cast(static_cast(i) - N_GHOSTS) }; - real_t j_max_ { static_cast(static_cast(j_max) - N_GHOSTS) }; - - real_t inv_polar_area_iPj { ONE / m_mblock.metric.polar_area(i_ + HALF) }; - real_t h3_min_iPjP { m_mblock.metric.h_33({ i_ + HALF, HALF }) }; - real_t h3_max_iPjM { m_mblock.metric.h_33({ i_ + HALF, j_max_ - HALF }) }; - - real_t inv_sqrt_detH_ijP { ONE / m_mblock.metric.sqrt_det_h({ i_, HALF }) }; - real_t h3_min_iMjP { m_mblock.metric.h_33({ i_ - HALF, HALF }) }; - - // theta = 0 - EX1(i, j_min) += inv_polar_area_iPj * m_coeff * (h3_min_iPjP * BX3(i, j_min)); - // theta = pi - EX1(i, j_max) -= inv_polar_area_iPj * m_coeff * (h3_max_iPjM * BX3(i, j_max)); - - // j = jmin + 1/2 - EX2(i, j_min) += inv_sqrt_detH_ijP * m_coeff * - (h3_min_iMjP * BX3(i - 1, j_min) - - h3_min_iPjP * BX3(i, j_min)); - } -} // namespace ntt - -#endif // NTT_AMPERE_KERNEL_HPP diff --git a/legacy/src/pic/fields/ampere_mink.hpp b/legacy/src/pic/fields/ampere_mink.hpp deleted file mode 100644 index 90bd5d518..000000000 --- a/legacy/src/pic/fields/ampere_mink.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef PIC_AMPERE_MINKOWSKI_H -#define PIC_AMPERE_MINKOWSKI_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" - -#include "io/output.h" -#include "meshblock/meshblock.h" - -namespace ntt { - /** - * @brief Algorithm for the Ampere's law: `dE/dt = curl B` in Minkowski space. - * @tparam D Dimension. - */ - template - class Ampere_kernel { - Meshblock m_mblock; - real_t m_coeff; - - public: - /** - * @brief Constructor. - * @param mblock Meshblock. - * @param coeff Coefficient to be multiplied by dE/dt = coeff * curl B. - */ - Ampere_kernel(const Meshblock& mblock, const real_t& coeff) : - m_mblock(mblock), - m_coeff(coeff) {} - - /** - * @brief 1D implementation of the algorithm. - * @param i1 index. - */ - Inline void operator()(index_t) const; - - /** - * @brief 2D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - */ - Inline void operator()(index_t, index_t) const; - - /** - * @brief 3D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - * @param i3 index. - */ - Inline void operator()(index_t, index_t, index_t) const; - }; - - template <> - Inline void Ampere_kernel::operator()(index_t i) const { - EX2(i) += m_coeff * (BX3(i - 1) - BX3(i)); - EX3(i) += m_coeff * (BX2(i) - BX2(i - 1)); - } - - template <> - Inline void Ampere_kernel::operator()(index_t i, index_t j) const { - EX1(i, j) += m_coeff * (BX3(i, j) - BX3(i, j - 1)); - EX2(i, j) += m_coeff * (BX3(i - 1, j) - BX3(i, j)); - EX3(i, j) += m_coeff * (BX1(i, j - 1) - BX1(i, j) + BX2(i, j) - BX2(i - 1, j)); - } - - template <> - Inline void Ampere_kernel::operator()(index_t i, index_t j, index_t k) const { - EX1(i, j, k) += m_coeff * (BX2(i, j, k - 1) - BX2(i, j, k) + BX3(i, j, k) - - BX3(i, j - 1, k)); - EX2(i, j, k) += m_coeff * (BX3(i - 1, j, k) - BX3(i, j, k) + BX1(i, j, k) - - BX1(i, j, k - 1)); - EX3(i, j, k) += m_coeff * (BX1(i, j - 1, k) - BX1(i, j, k) + BX2(i, j, k) - - BX2(i - 1, j, k)); - } -} // namespace ntt -#endif // PIC_AMPERE_MINKOWSKI_H \ No newline at end of file diff --git a/legacy/src/pic/fields/faraday_curv.hpp b/legacy/src/pic/fields/faraday_curv.hpp deleted file mode 100644 index 0b8880e69..000000000 --- a/legacy/src/pic/fields/faraday_curv.hpp +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef PIC_FARADAY_CURVILINEAR_H -#define PIC_FARADAY_CURVILINEAR_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" - -#include "io/output.h" -#include "meshblock/meshblock.h" - -#include - -namespace ntt { - - /** - * @brief Algorithm for the Faraday's law: `dB/dt = -curl E` in Curvilinear - * space (diagonal metric). - * @tparam D Dimension. - */ - template - class Faraday_kernel { - Meshblock m_mblock; - real_t m_coeff; - - public: - /** - * @brief Constructor. - * @param mblock Meshblock. - * @param coeff Coefficient to be multiplied by dB/dt = coeff * -curl E. - */ - Faraday_kernel(const Meshblock& mblock, const real_t& coeff) : - m_mblock(mblock), - m_coeff(coeff) {} - - /** - * @brief 2D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - */ - Inline void operator()(index_t, index_t) const; - /** - * @brief 3D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - * @param i3 index. - */ - Inline void operator()(index_t, index_t, index_t) const; - }; - - template <> - Inline void Faraday_kernel::operator()(index_t i, index_t j) const { - real_t i_ { static_cast(static_cast(i) - N_GHOSTS) }; - real_t j_ { static_cast(static_cast(j) - N_GHOSTS) }; - - real_t inv_sqrt_detH_iPj { ONE / m_mblock.metric.sqrt_det_h({ i_ + HALF, j_ }) }; - real_t inv_sqrt_detH_ijP { ONE / m_mblock.metric.sqrt_det_h({ i_, j_ + HALF }) }; - real_t inv_sqrt_detH_iPjP { ONE / m_mblock.metric.sqrt_det_h( - { i_ + HALF, j_ + HALF }) }; - real_t h1_iPjP1 { m_mblock.metric.h_11({ i_ + HALF, j_ + ONE }) }; - real_t h1_iPj { m_mblock.metric.h_11({ i_ + HALF, j_ }) }; - real_t h2_iP1jP { m_mblock.metric.h_22({ i_ + ONE, j_ + HALF }) }; - real_t h2_ijP { m_mblock.metric.h_22({ i_, j_ + HALF }) }; - real_t h3_ij { m_mblock.metric.h_33({ i_, j_ }) }; - real_t h3_iP1j { m_mblock.metric.h_33({ i_ + ONE, j_ }) }; - real_t h3_ijP1 { m_mblock.metric.h_33({ i_, j_ + ONE }) }; - - BX1(i, j) += m_coeff * inv_sqrt_detH_ijP * - (h3_ij * EX3(i, j) - h3_ijP1 * EX3(i, j + 1)); - if (j == N_GHOSTS) { - BX2(i, j) =m_coeff * (EX3(i + 1, j) - EX3(i, j)); - } else { - BX2(i, j) += m_coeff * inv_sqrt_detH_iPj * - (h3_iP1j * EX3(i + 1, j) - h3_ij * EX3(i, j)); - } - BX3(i, j) += m_coeff * inv_sqrt_detH_iPjP * - (h1_iPjP1 * EX1(i, j + 1) - h1_iPj * EX1(i, j) + - h2_ijP * EX2(i, j) - h2_iP1jP * EX2(i + 1, j)); - } - - template <> - Inline void Faraday_kernel::operator()(index_t, index_t, index_t) const { - // 3d curvilinear faraday not implemented - } -} // namespace ntt - -#endif diff --git a/legacy/src/pic/fields/faraday_mink.hpp b/legacy/src/pic/fields/faraday_mink.hpp deleted file mode 100644 index cebf67f15..000000000 --- a/legacy/src/pic/fields/faraday_mink.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef PIC_FARADAY_MINKOWSKI_H -#define PIC_FARADAY_MINKOWSKI_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "pic.h" - -#include "io/output.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - /** - * @brief Algorithm for the Faraday's law: `dB/dt = -curl E` in Minkowski space. - * @tparam D Dimension. - */ - template - class Faraday_kernel { - Meshblock m_mblock; - real_t m_coeff; - - public: - /** - * @brief Constructor. - * @param mblock Meshblock. - * @param coeff Coefficient to be multiplied by dB/dt = coeff * -curl E. - */ - Faraday_kernel(const Meshblock& mblock, const real_t& coeff) : - m_mblock(mblock), - m_coeff(coeff) {} - - /** - * @brief 1D implementation of the algorithm. - * @param i1 index. - */ - Inline void operator()(index_t) const; - /** - * @brief 2D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - */ - Inline void operator()(index_t, index_t) const; - /** - * @brief 3D implementation of the algorithm. - * @param i1 index. - * @param i2 index. - * @param i3 index. - */ - Inline void operator()(index_t, index_t, index_t) const; - }; - - template <> - Inline void Faraday_kernel::operator()(index_t i) const { - BX2(i) += m_coeff * (EX3(i + 1) - EX3(i)); - BX3(i) += m_coeff * (EX2(i) - EX2(i + 1)); - } - - template <> - Inline void Faraday_kernel::operator()(index_t i, index_t j) const { - BX1(i, j) += m_coeff * (EX3(i, j) - EX3(i, j + 1)); - BX2(i, j) += m_coeff * (EX3(i + 1, j) - EX3(i, j)); - BX3(i, j) += m_coeff * (EX1(i, j + 1) - EX1(i, j) + EX2(i, j) - EX2(i + 1, j)); - } - - template <> - Inline void Faraday_kernel::operator()(index_t i, index_t j, index_t k) const { - BX1(i, j, k) += m_coeff * (EX2(i, j, k + 1) - EX2(i, j, k) + EX3(i, j, k) - - EX3(i, j + 1, k)); - BX2(i, j, k) += m_coeff * (EX3(i + 1, j, k) - EX3(i, j, k) + EX1(i, j, k) - - EX1(i, j, k + 1)); - BX3(i, j, k) += m_coeff * (EX1(i, j + 1, k) - EX1(i, j, k) + EX2(i, j, k) - - EX2(i + 1, j, k)); - } -} // namespace ntt - -#endif diff --git a/legacy/src/pic/particles/particle_pusher.hpp b/legacy/src/pic/particles/particle_pusher.hpp deleted file mode 100644 index 7991a95a4..000000000 --- a/legacy/src/pic/particles/particle_pusher.hpp +++ /dev/null @@ -1,1093 +0,0 @@ -#ifndef PIC_PARTICLE_PUSHER_H -#define PIC_PARTICLE_PUSHER_H - -#include "utils/qmath.h" - -#include "io/output.h" -#include "meshblock/meshblock.h" -#include "meshblock/particles.h" -#include "pic.h" -#include "wrapper.h" -#include METRIC_HEADER - -#include - -#ifdef EXTERNAL_FORCE - #include PGEN_HEADER -#endif - -namespace ntt { - struct Boris_t {}; - - struct Vay_t {}; - - struct Photon_t {}; - - // struct Boris_GCA_t {}; - - // struct Vay_GCA_t {}; - - struct GCA_t {}; - - struct NoGCA_t {}; - - struct Massive_t {}; - - struct Massless_t {}; - - /** - * @brief Algorithm for the Particle pusher. - * @tparam D Dimension. - */ - template - class Pusher_kernel { - ndfield_t EB; - array_t i1, i2, i3; - array_t i1_prev, i2_prev, i3_prev; - array_t dx1, dx2, dx3; - array_t dx1_prev, dx2_prev, dx3_prev; - array_t ux1, ux2, ux3; - array_t phi; - array_t tag; - const Metric metric; - - const real_t time, coeff, dt; - const int ni1, ni2, ni3; - const real_t gca_larmor { 0.05 }, gca_EovrB_sqr { 0.81 }; - bool is_ax_i2min { false }, is_ax_i2max { false }; - bool is_absorb_i1min { false }, is_absorb_i1max { false }; - bool is_absorb_i2min { false }, is_absorb_i2max { false }; - bool is_absorb_i3min { false }, is_absorb_i3max { false }; - bool is_periodic_i1min { false }, is_periodic_i1max { false }; - bool is_periodic_i2min { false }, is_periodic_i2max { false }; - bool is_periodic_i3min { false }, is_periodic_i3max { false }; - -#ifdef EXTERNAL_FORCE - ProblemGenerator pgen; -#endif - - public: - Pusher_kernel(Meshblock& mblock, - Particles& particles, - real_t time, - real_t coeff, - real_t dt, - ProblemGenerator& pgen) - : EB { mblock.em } - , i1 { particles.i1 } - , i2 { particles.i2 } - , i3 { particles.i3 } - , i1_prev { particles.i1_prev } - , i2_prev { particles.i2_prev } - , i3_prev { particles.i3_prev } - , dx1 { particles.dx1 } - , dx2 { particles.dx2 } - , dx3 { particles.dx3 } - , dx1_prev { particles.dx1_prev } - , dx2_prev { particles.dx2_prev } - , dx3_prev { particles.dx3_prev } - , ux1 { particles.ux1 } - , ux2 { particles.ux2 } - , ux3 { particles.ux3 } - , phi { particles.phi } - , tag { particles.tag } - , metric { mblock.metric } - , time { time } - , coeff { coeff } - , dt { dt } - , ni1 { (int)mblock.Ni1() } - , ni2 { (int)mblock.Ni2() } - , ni3 { (int)mblock.Ni3() } -#ifdef EXTERNAL_FORCE - , pgen { pgen } -#endif - { - (void)pgen; - NTTHostErrorIf(mblock.boundaries.size() < 1, - "boundaries defined incorrectly"); - is_absorb_i1min = (mblock.boundaries[0][0] == BoundaryCondition::OPEN) || - (mblock.boundaries[0][0] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[0][0] == BoundaryCondition::ABSORB); - is_absorb_i1max = (mblock.boundaries[0][1] == BoundaryCondition::OPEN) || - (mblock.boundaries[0][1] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[0][1] == BoundaryCondition::ABSORB); - is_periodic_i1min = (mblock.boundaries[0][0] == BoundaryCondition::PERIODIC); - is_periodic_i1max = (mblock.boundaries[0][1] == BoundaryCondition::PERIODIC); - if constexpr ((D == Dim2) || (D == Dim3)) { - NTTHostErrorIf(mblock.boundaries.size() < 2, - "boundaries defined incorrectly"); - is_absorb_i2min = (mblock.boundaries[1][0] == BoundaryCondition::OPEN) || - (mblock.boundaries[1][0] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[1][0] == BoundaryCondition::ABSORB); - is_absorb_i2max = (mblock.boundaries[1][1] == BoundaryCondition::OPEN) || - (mblock.boundaries[1][1] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[1][1] == BoundaryCondition::ABSORB); - is_ax_i2min = (mblock.boundaries[1][0] == BoundaryCondition::AXIS); - is_ax_i2max = (mblock.boundaries[1][1] == BoundaryCondition::AXIS); - is_periodic_i2min = (mblock.boundaries[1][0] == BoundaryCondition::PERIODIC); - is_periodic_i2max = (mblock.boundaries[1][1] == BoundaryCondition::PERIODIC); - } - if constexpr (D == Dim3) { - NTTHostErrorIf(mblock.boundaries.size() < 3, - "boundaries defined incorrectly"); - is_absorb_i3min = (mblock.boundaries[2][0] == BoundaryCondition::OPEN) || - (mblock.boundaries[2][0] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[2][0] == BoundaryCondition::ABSORB); - is_absorb_i3max = (mblock.boundaries[2][1] == BoundaryCondition::OPEN) || - (mblock.boundaries[2][1] == BoundaryCondition::CUSTOM) || - (mblock.boundaries[2][1] == BoundaryCondition::ABSORB); - is_periodic_i3min = (mblock.boundaries[2][0] == BoundaryCondition::PERIODIC); - is_periodic_i3max = (mblock.boundaries[2][1] == BoundaryCondition::PERIODIC); - } - } - - template - Inline void operator()(P, G, M, index_t) const; - // Inline void operator()(Boris_t, index_t) const; - // Inline void operator()(Vay_t, index_t) const; - // Inline void operator()(Boris_GCA_t, index_t) const; - // Inline void operator()(Vay_GCA_t, index_t) const; - // Inline void operator()(Photon_t, index_t) const; - - // Updaters - - /** - * @brief update particle velocities - * @param P pusher algorithm - * @param G GCA tag - * @param M massive tag - * @param p, e0, b0 index & interpolated fields - */ - Inline void velUpd(Boris_t, index_t&, vec_t&, vec_t&) const; - Inline void velUpd(Vay_t, Massive_t, index_t&, vec_t&, vec_t&) const; - // Inline void velUpd(Boris_t, NoGCA_t, index_t&, vec_t&, vec_t&) const; - // Inline void velUpd(Vay_t, NoGCA_t, index_t&, vec_t&, vec_t&) const; - Inline void velUpd(Photon_t, index_t&, vec_t&, vec_t&) const; - - // Inline void velUpd(Vay_t, GCA_t, Massive_t, index_t&, vec_t&, vec_t&) const; - - // Inline void velUpd(Vay_t, index_t&, vec_t&, vec_t&) const; - - // Inline void velUpd(Photon_t, index_t&, vec_t&, vec_t&) const {} - - // #ifndef EXTERNAL_FORCE - // Inline void velUpd(GCA_t, index_t&, vec_t&, vec_t&) const; - // #else - // Inline void velUpd(GCA_t, index_t&, vec_t&, vec_t&, vec_t&) const; - // #endif - - /** - * @brief update particle positions with updated velocities - * @param p, v index & 3-velocity - */ - Inline void posUpd(index_t&, const vec_t&) const; - - /** - * @brief same as posUpd but per component - */ - Inline void posUpd_x1(index_t&, const real_t&) const; - Inline void posUpd_x2(index_t&, const real_t&) const; - Inline void posUpd_x3(index_t&, const real_t&) const; - - // /** - // * @brief apply boundary conditions - // */ - // Inline void boundaryConditions(index_t&) const; - // Inline void boundaryConditions_x1(index_t&) const; - // Inline void boundaryConditions_x2(index_t&) const; - // Inline void boundaryConditions_x3(index_t&) const; - - // Getters - Inline void getPrtlPos(index_t&, coord_t&) const; - - template - Inline void get3VelCntrv(T, index_t&, vec_t&, vec_t&) const; - - Inline auto getEnergy(Massive_t, index_t& p) const -> real_t; - - Inline auto getEnergy(Massless_t, index_t& p) const -> real_t; - - Inline void getInterpFlds(index_t&, vec_t&, vec_t&) const; - - // Extra - -#ifdef EXTERNAL_FORCE - #ifdef MINKOWSKI_METRIC - Inline void initForce(coord_t&, vec_t&) const; - #else - Inline void initForce(coord_t&, vec_t&) const; - #endif - Inline void forceHalfUpdate(index_t&, vec_t&) const; -#endif - }; - - template - void PushLoop(const SimulationParams& params, - Meshblock& mblock, - Particles& particles, - ProblemGenerator& pgen, - real_t time, - real_t factor) { - const auto dt = factor * mblock.timestep(); - const auto charge_ovr_mass = particles.mass() > ZERO - ? particles.charge() / particles.mass() - : ZERO; - const auto coeff = charge_ovr_mass * HALF * dt * params.B0(); - Kokkos::parallel_for( - "ParticlesPush", - Kokkos::RangePolicy(0, particles.npart()), - Pusher_kernel(mblock, particles, time, coeff, dt, pgen)); - } - - /** - * Definitions - */ - - template - template - Inline void Pusher_kernel::operator()(P, G, M, index_t p) const { - if (tag(p) == ParticleTag::alive) { - coord_t xp { ZERO }, xp_Cart; - getPrtlPos(p, xp); - metric.x_Code2Cart(xp, xp_Cart); - - vec_t ei { ZERO }, bi { ZERO }; - vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - getInterpFlds(p, ei, bi); - metric.v3_Cntrv2Cart(xp, ei, ei_Cart); - metric.v3_Cntrv2Cart(xp, bi, bi_Cart); - -#ifdef EXTERNAL_FORCE - vec_t force_Cart { ZERO }; - initForce(xp, force_Cart); - forceHalfUpdate(p, force_Cart); -#endif - velUpd(P {}, p, ei_Cart, bi_Cart); -#ifdef EXTERNAL_FORCE - forceHalfUpdate(p, force_Cart); -#endif - - // vec_t v { ZERO }; - // get3VelCntrv(Massive_t {}, p, xp, v); - // posUpd(p, v); - // boundaryConditions(p); - } - } - - // template - // Inline void Pusher_kernel::operator()(Photon_t, index_t p) const { - // if (tag(p) == ParticleTag::alive) { - // coord_t xp { ZERO }; - // vec_t v { ZERO }; - // getPrtlPos(p, xp); - // get3VelCntrv(Massless_t {}, p, xp, v); - // posUpd(p, v); - // boundaryConditions(p); - // } - // } - - // template - // Inline void Pusher_kernel::operator()(Boris_t, index_t p) const { - // if (tag(p) == ParticleTag::alive) { - // coord_t xp { ZERO }, xp_Cart; - // getPrtlPos(p, xp); - // metric.x_Code2Cart(xp, xp_Cart); - - // vec_t ei { ZERO }, bi { ZERO }; - // vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - // getInterpFlds(p, ei, bi); - // metric.v3_Cntrv2Cart(xp, ei, ei_Cart); - // metric.v3_Cntrv2Cart(xp, bi, bi_Cart); - - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // forceHalfUpdate(p, force_Cart); - // #endif - // velUpd(Boris_t {}, p, ei_Cart, bi_Cart); - // #ifdef EXTERNAL_FORCE - // forceHalfUpdate(p, force_Cart); - // #endif - - // // vec_t v { ZERO }; - // // get3VelCntrv(Massive_t {}, p, xp, v); - // posUpd(p, v); - // // boundaryConditions(p); - // } - // } - - // template - // Inline void Pusher_kernel::operator()(Vay_t, index_t p) const { - // if (tag(p) == ParticleTag::alive) { - // coord_t xp { ZERO }; - // getPrtlPos(p, xp); - - // vec_t ei { ZERO }, bi { ZERO }; - // vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - // getInterpFlds(p, ei, bi); - // metric.v3_Cntrv2Cart(xp, ei, ei_Cart); - // metric.v3_Cntrv2Cart(xp, bi, bi_Cart); - - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // forceHalfUpdate(p, force_Cart); - // #endif - // velUpd(Vay_t {}, p, ei_Cart, bi_Cart); - // #ifdef EXTERNAL_FORCE - // forceHalfUpdate(p, force_Cart); - // #endif - - // vec_t v { ZERO }; - // get3VelCntrv(Massive_t {}, p, xp, v); - // posUpd(p, v); - // boundaryConditions(p); - // } - // } - - // template - // Inline void Pusher_kernel::operator()(Boris_GCA_t, index_t p) const { - // if (tag(p) == ParticleTag::alive) { - // coord_t xp { ZERO }; - // getPrtlPos(p, xp); - - // vec_t ei { ZERO }, bi { ZERO }; - // vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - // getInterpFlds(p, ei, bi); - // metric.v3_Cntrv2Cart(xp, ei, ei_Cart); - // metric.v3_Cntrv2Cart(xp, bi, bi_Cart); - - // const auto E2 { NORM_SQR(ei_Cart[0], ei_Cart[1], ei_Cart[2]) }; - // const auto B2 { NORM_SQR(bi_Cart[0], bi_Cart[1], bi_Cart[2]) }; - // const auto rL { math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))) * dt / - // (TWO * coeff * math::sqrt(B2)) }; - // if (B2 > ZERO && rL < gca_larmor && (E2 / B2) < gca_EovrB_sqr) { - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // velUpd(GCA_t {}, p, force_Cart, ei_Cart, bi_Cart); - // #else - // velUpd(GCA_t {}, p, ei_Cart, bi_Cart); - // #endif - // } else { - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // forceHalfUpdate(p, force_Cart); - // #endif - // velUpd(Boris_t {}, p, ei_Cart, bi_Cart); - // #ifdef EXTERNAL_FORCE - // forceHalfUpdate(p, force_Cart); - // #endif - // } - - // vec_t v { ZERO }; - // get3VelCntrv(Massive_t {}, p, xp, v); - // posUpd(p, v); - // boundaryConditions(p); - // } - // } - - // template - // Inline void Pusher_kernel::operator()(Vay_GCA_t, index_t p) const { - // if (tag(p) == ParticleTag::alive) { - // coord_t xp { ZERO }; - // getPrtlPos(p, xp); - - // vec_t ei { ZERO }, bi { ZERO }; - // vec_t ei_Cart { ZERO }, bi_Cart { ZERO }; - // getInterpFlds(p, ei, bi); - // metric.v3_Cntrv2Cart(xp, ei, ei_Cart); - // metric.v3_Cntrv2Cart(xp, bi, bi_Cart); - - // const auto E2 { NORM_SQR(ei_Cart[0], ei_Cart[1], ei_Cart[2]) }; - // const auto B2 { NORM_SQR(bi_Cart[0], bi_Cart[1], bi_Cart[2]) }; - // const auto rL { math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))) * dt / - // (TWO * coeff * math::sqrt(B2)) }; - // if (rL < gca_larmor && (E2 / B2) < gca_EovrB_sqr) { - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // velUpd(GCA_t {}, p, force_Cart, ei_Cart, bi_Cart); - // #else - // velUpd(GCA_t {}, p, ei_Cart, bi_Cart); - // #endif - // } else { - // #ifdef EXTERNAL_FORCE - // vec_t force_Cart { ZERO }; - // initForce(xp, force_Cart); - // forceHalfUpdate(p, force_Cart); - // #endif - // velUpd(Vay_t {}, p, ei_Cart, bi_Cart); - // #ifdef EXTERNAL_FORCE - // forceHalfUpdate(p, force_Cart); - // #endif - // } - - // vec_t v { ZERO }; - // get3VelCntrv(Massive_t {}, p, xp, v); - // posUpd(p, v); - // boundaryConditions(p); - // } - // } - - // Velocity update - - template - Inline void Pusher_kernel::velUpd(Boris_t, - index_t& p, - vec_t& e0, - vec_t& b0) const { - real_t COEFF { coeff }; - - e0[0] *= COEFF; - e0[1] *= COEFF; - e0[2] *= COEFF; - vec_t u0 { ux1(p) + e0[0], ux2(p) + e0[1], ux3(p) + e0[2] }; - - COEFF *= ONE / math::sqrt(ONE + NORM_SQR(u0[0], u0[1], u0[2])); - b0[0] *= COEFF; - b0[1] *= COEFF; - b0[2] *= COEFF; - COEFF = TWO / (ONE + NORM_SQR(b0[0], b0[1], b0[2])); - - vec_t u1 { - (u0[0] + CROSS_x1(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF, - (u0[1] + CROSS_x2(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF, - (u0[2] + CROSS_x3(u0[0], u0[1], u0[2], b0[0], b0[1], b0[2])) * COEFF - }; - - u0[0] += CROSS_x1(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[0]; - u0[1] += CROSS_x2(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[1]; - u0[2] += CROSS_x3(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) + e0[2]; - - ux1(p) = u0[0]; - ux2(p) = u0[1]; - ux3(p) = u0[2]; - } - - template - Inline void Pusher_kernel::velUpd(Vay_t, - index_t& p, - vec_t& e0, - vec_t& b0) const { - auto COEFF { coeff }; - e0[0] *= COEFF; - e0[1] *= COEFF; - e0[2] *= COEFF; - - b0[0] *= COEFF; - b0[1] *= COEFF; - b0[2] *= COEFF; - - COEFF = ONE / math::sqrt(ONE + NORM_SQR(ux1(p), ux2(p), ux3(p))); - - vec_t u1 { - (ux1(p) + TWO * e0[0] + - CROSS_x1(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF), - (ux2(p) + TWO * e0[1] + - CROSS_x2(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF), - (ux3(p) + TWO * e0[2] + - CROSS_x3(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) * COEFF) - }; - COEFF = DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]); - auto COEFF2 { ONE + NORM_SQR(u1[0], u1[1], u1[2]) - - NORM_SQR(b0[0], b0[1], b0[2]) }; - - COEFF = ONE / - math::sqrt( - INV_2 * (COEFF2 + math::sqrt(SQR(COEFF2) + - FOUR * (SQR(b0[0]) + SQR(b0[1]) + - SQR(b0[2]) + SQR(COEFF))))); - COEFF2 = ONE / - (ONE + SQR(b0[0] * COEFF) + SQR(b0[1] * COEFF) + SQR(b0[2] * COEFF)); - - ux1(p) = COEFF2 * (u1[0] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[0] * COEFF) + - u1[1] * b0[2] * COEFF - u1[2] * b0[1] * COEFF); - ux2(p) = COEFF2 * (u1[1] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[1] * COEFF) + - u1[2] * b0[0] * COEFF - u1[0] * b0[2] * COEFF); - ux3(p) = COEFF2 * (u1[2] + - COEFF * DOT(u1[0], u1[1], u1[2], b0[0], b0[1], b0[2]) * - (b0[2] * COEFF) + - u1[0] * b0[1] * COEFF - u1[1] * b0[0] * COEFF); - } - - // template - // Inline void Pusher_kernel::velUpd(GCA_t, - // index_t& p, - // #ifdef EXTERNAL_FORCE - // vec_t& f0, - // #endif - // vec_t& e0, - // vec_t& b0) const { - // const auto eb_sqr { NORM_SQR(e0[0], e0[1], e0[2]) + - // NORM_SQR(b0[0], b0[1], b0[2]) }; - - // const vec_t wE { - // CROSS_x1(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - // CROSS_x2(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr, - // CROSS_x3(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) / eb_sqr - // }; - - // { - // const auto b_norm_inv { ONE / NORM(b0[0], b0[1], b0[2]) }; - // b0[0] *= b_norm_inv; - // b0[1] *= b_norm_inv; - // b0[2] *= b_norm_inv; - // } - // auto upar { DOT(ux1(p), ux2(p), ux3(p), b0[0], b0[1], b0[2]) + - // coeff * TWO * DOT(e0[0], e0[1], e0[2], b0[0], b0[1], b0[2]) }; - - // #ifdef EXTERNAL_FORCE - // upar += dt * DOT(f0[0], f0[1], f0[2], b0[0], b0[1], b0[2]); - // #endif - - // real_t factor; - // { - // const auto wE_sqr { NORM_SQR(wE[0], wE[1], wE[2]) }; - // if (wE_sqr < static_cast(0.01)) { - // factor = ONE + wE_sqr + TWO * SQR(wE_sqr) + FIVE * SQR(wE_sqr) * wE_sqr; - // } else { - // factor = (ONE - math::sqrt(ONE - FOUR * wE_sqr)) / (TWO * wE_sqr); - // } - // } - // const vec_t vE_Cart { wE[0] * factor, wE[1] * factor, wE[2] * factor }; - // const auto Gamma { math::sqrt(ONE + SQR(upar)) / - // math::sqrt( - // ONE - NORM_SQR(vE_Cart[0], vE_Cart[1], vE_Cart[2])) }; - // ux1(p) = upar * b0[0] + vE_Cart[0] * Gamma; - // ux2(p) = upar * b0[1] + vE_Cart[1] * Gamma; - // ux3(p) = upar * b0[2] + vE_Cart[2] * Gamma; - // } - - // Position update - template <> - Inline void Pusher_kernel::posUpd(index_t& p, const vec_t& v) const { - i1_prev(p) = i1(p); - dx1_prev(p) = dx1(p); - posUpd_x1(p, v[0]); - } - - template <> - Inline void Pusher_kernel::posUpd(index_t& p, const vec_t& v) const { - i1_prev(p) = i1(p); - dx1_prev(p) = dx1(p); - i2_prev(p) = i2(p); - dx2_prev(p) = dx2(p); - posUpd_x1(p, v[0]); - posUpd_x2(p, v[1]); -#ifndef MINKOWSKI_METRIC - phi(p) += dt * v[2]; -#endif - } - - template <> - Inline void Pusher_kernel::posUpd(index_t& p, const vec_t& v) const { - i1_prev(p) = i1(p); - dx1_prev(p) = dx1(p); - i2_prev(p) = i2(p); - dx2_prev(p) = dx2(p); - i3_prev(p) = i3(p); - dx3_prev(p) = dx3(p); - posUpd_x1(p, v[0]); - posUpd_x2(p, v[1]); - posUpd_x3(p, v[2]); - } - - // !TODO: check if SIGNf can be done better - template - Inline void Pusher_kernel::posUpd_x1(index_t& p, const real_t& vx1) const { - dx1(p) += static_cast(dt * vx1); - auto temp_i { static_cast(dx1(p)) }; - auto temp_r { math::fmax(SIGNf(dx1(p)) + temp_i, static_cast(temp_i)) - - static_cast(1.0) }; - temp_i = static_cast(temp_r); - i1(p) = i1(p) + temp_i; - dx1(p) = dx1(p) - temp_r; - } - - template - Inline void Pusher_kernel::posUpd_x2(index_t& p, const real_t& vx2) const { - dx2(p) += static_cast(dt * vx2); - auto temp_i { static_cast(dx2(p)) }; - auto temp_r { math::fmax(SIGNf(dx2(p)) + temp_i, static_cast(temp_i)) - - static_cast(1.0) }; - temp_i = static_cast(temp_r); - i2(p) = i2(p) + temp_i; - dx2(p) = dx2(p) - temp_r; - } - - template - Inline void Pusher_kernel::posUpd_x3(index_t& p, const real_t& vx3) const { - dx3(p) += static_cast(dt * vx3); - auto temp_i { static_cast(dx3(p)) }; - auto temp_r { math::fmax(SIGNf(dx3(p)) + temp_i, static_cast(temp_i)) - - static_cast(1.0) }; - temp_i = static_cast(temp_r); - i3(p) = i3(p) + temp_i; - dx3(p) = dx3(p) - temp_r; - } - - // Getters - template - template - Inline void Pusher_kernel::get3VelCntrv(T, - index_t& p, - vec_t& xp, - vec_t& v) const { - metric.v3_Cart2Cntrv(xp, { ux1(p), ux2(p), ux3(p) }, v); - auto inv_energy { ONE / getEnergy(T {}, p) }; - v[0] *= inv_energy; - v[1] *= inv_energy; - v[2] *= inv_energy; - } - -#ifdef MINKOWSKI_METRIC - template <> - Inline void Pusher_kernel::getPrtlPos(index_t& p, coord_t& xp) const { - xp[0] = static_cast(i1(p)) + static_cast(dx1(p)); - } - - template <> - Inline void Pusher_kernel::getPrtlPos(index_t& p, coord_t& xp) const { - xp[0] = static_cast(i1(p)) + static_cast(dx1(p)); - xp[1] = static_cast(i2(p)) + static_cast(dx2(p)); - } -#else - template <> - Inline void Pusher_kernel::getPrtlPos(index_t&, coord_t&) const { - NTTError("not applicable"); - } - - template <> - Inline void Pusher_kernel::getPrtlPos(index_t& p, - coord_t& xp) const { - xp[0] = static_cast(i1(p)) + static_cast(dx1(p)); - xp[1] = static_cast(i2(p)) + static_cast(dx2(p)); - xp[2] = phi(p); - } -#endif - - template <> - Inline void Pusher_kernel::getPrtlPos(index_t& p, coord_t& xp) const { - xp[0] = static_cast(i1(p)) + static_cast(dx1(p)); - xp[1] = static_cast(i2(p)) + static_cast(dx2(p)); - xp[2] = static_cast(i3(p)) + static_cast(dx3(p)); - } - - template - Inline auto Pusher_kernel::getEnergy(Massive_t, index_t& p) const -> real_t { - return math::sqrt(ONE + SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))); - } - - template - Inline auto Pusher_kernel::getEnergy(Massless_t, index_t& p) const -> real_t { - return math::sqrt(SQR(ux1(p)) + SQR(ux2(p)) + SQR(ux3(p))); - } - - template <> - Inline void Pusher_kernel::getInterpFlds(index_t& p, - vec_t& e0, - vec_t& b0) const { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - - // first order - real_t c0, c1; - - // Ex1 - // interpolate to nodes - c0 = HALF * (EB(i, em::ex1) + EB(i - 1, em::ex1)); - c1 = HALF * (EB(i, em::ex1) + EB(i + 1, em::ex1)); - // interpolate from nodes to the particle position - e0[0] = c0 * (ONE - dx1_) + c1 * dx1_; - // Ex2 - c0 = EB(i, em::ex2); - c1 = EB(i + 1, em::ex2); - e0[1] = c0 * (ONE - dx1_) + c1 * dx1_; - // Ex3 - c0 = EB(i, em::ex3); - c1 = EB(i + 1, em::ex3); - e0[2] = c0 * (ONE - dx1_) + c1 * dx1_; - - // Bx1 - c0 = EB(i, em::bx1); - c1 = EB(i + 1, em::bx1); - b0[0] = c0 * (ONE - dx1_) + c1 * dx1_; - // Bx2 - c0 = HALF * (EB(i - 1, em::bx2) + EB(i, em::bx2)); - c1 = HALF * (EB(i, em::bx2) + EB(i + 1, em::bx2)); - b0[1] = c0 * (ONE - dx1_) + c1 * dx1_; - // Bx3 - c0 = HALF * (EB(i - 1, em::bx3) + EB(i, em::bx3)); - c1 = HALF * (EB(i, em::bx3) + EB(i + 1, em::bx3)); - b0[2] = c0 * (ONE - dx1_) + c1 * dx1_; - } - - template <> - Inline void Pusher_kernel::getInterpFlds(index_t& p, - vec_t& e0, - vec_t& b0) const { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - - // first order - real_t c000, c100, c010, c110, c00, c10; - - // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, em::ex1) + EB(i - 1, j, em::ex1)); - c100 = HALF * (EB(i, j, em::ex1) + EB(i + 1, j, em::ex1)); - c010 = HALF * (EB(i, j + 1, em::ex1) + EB(i - 1, j + 1, em::ex1)); - c110 = HALF * (EB(i, j + 1, em::ex1) + EB(i + 1, j + 1, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[0] = c00 * (ONE - dx2_) + c10 * dx2_; - // Ex2 - c000 = HALF * (EB(i, j, em::ex2) + EB(i, j - 1, em::ex2)); - c100 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j - 1, em::ex2)); - c010 = HALF * (EB(i, j, em::ex2) + EB(i, j + 1, em::ex2)); - c110 = HALF * (EB(i + 1, j, em::ex2) + EB(i + 1, j + 1, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[1] = c00 * (ONE - dx2_) + c10 * dx2_; - // Ex3 - c000 = EB(i, j, em::ex3); - c100 = EB(i + 1, j, em::ex3); - c010 = EB(i, j + 1, em::ex3); - c110 = EB(i + 1, j + 1, em::ex3); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - e0[2] = c00 * (ONE - dx2_) + c10 * dx2_; - - // Bx1 - c000 = HALF * (EB(i, j, em::bx1) + EB(i, j - 1, em::bx1)); - c100 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j - 1, em::bx1)); - c010 = HALF * (EB(i, j, em::bx1) + EB(i, j + 1, em::bx1)); - c110 = HALF * (EB(i + 1, j, em::bx1) + EB(i + 1, j + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[0] = c00 * (ONE - dx2_) + c10 * dx2_; - // Bx2 - c000 = HALF * (EB(i - 1, j, em::bx2) + EB(i, j, em::bx2)); - c100 = HALF * (EB(i, j, em::bx2) + EB(i + 1, j, em::bx2)); - c010 = HALF * (EB(i - 1, j + 1, em::bx2) + EB(i, j + 1, em::bx2)); - c110 = HALF * (EB(i, j + 1, em::bx2) + EB(i + 1, j + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[1] = c00 * (ONE - dx2_) + c10 * dx2_; - // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, em::bx3) + EB(i - 1, j, em::bx3) + - EB(i, j - 1, em::bx3) + EB(i, j, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, em::bx3) + EB(i, j, em::bx3) + - EB(i + 1, j - 1, em::bx3) + EB(i + 1, j, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, em::bx3) + EB(i - 1, j + 1, em::bx3) + - EB(i, j, em::bx3) + EB(i, j + 1, em::bx3)); - c110 = INV_4 * (EB(i, j, em::bx3) + EB(i, j + 1, em::bx3) + - EB(i + 1, j, em::bx3) + EB(i + 1, j + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - b0[2] = c00 * (ONE - dx2_) + c10 * dx2_; - } - - template <> - Inline void Pusher_kernel::getInterpFlds(index_t& p, - vec_t& e0, - vec_t& b0) const { - const int i { i1(p) + static_cast(N_GHOSTS) }; - const int j { i2(p) + static_cast(N_GHOSTS) }; - const int k { i3(p) + static_cast(N_GHOSTS) }; - const auto dx1_ { static_cast(dx1(p)) }; - const auto dx2_ { static_cast(dx2(p)) }; - const auto dx3_ { static_cast(dx3(p)) }; - - // first order - real_t c000, c100, c010, c110, c001, c101, c011, c111, c00, c10, c01, c11, - c0, c1; - - // Ex1 - // interpolate to nodes - c000 = HALF * (EB(i, j, k, em::ex1) + EB(i - 1, j, k, em::ex1)); - c100 = HALF * (EB(i, j, k, em::ex1) + EB(i + 1, j, k, em::ex1)); - c010 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i - 1, j + 1, k, em::ex1)); - c110 = HALF * (EB(i, j + 1, k, em::ex1) + EB(i + 1, j + 1, k, em::ex1)); - // interpolate from nodes to the particle position - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - // interpolate to nodes - c001 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i - 1, j, k + 1, em::ex1)); - c101 = HALF * (EB(i, j, k + 1, em::ex1) + EB(i + 1, j, k + 1, em::ex1)); - c011 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i - 1, j + 1, k + 1, em::ex1)); - c111 = HALF * - (EB(i, j + 1, k + 1, em::ex1) + EB(i + 1, j + 1, k + 1, em::ex1)); - // interpolate from nodes to the particle position - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[0] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Ex2 - c000 = HALF * (EB(i, j, k, em::ex2) + EB(i, j - 1, k, em::ex2)); - c100 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j - 1, k, em::ex2)); - c010 = HALF * (EB(i, j, k, em::ex2) + EB(i, j + 1, k, em::ex2)); - c110 = HALF * (EB(i + 1, j, k, em::ex2) + EB(i + 1, j + 1, k, em::ex2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c001 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j - 1, k + 1, em::ex2)); - c101 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j - 1, k + 1, em::ex2)); - c011 = HALF * (EB(i, j, k + 1, em::ex2) + EB(i, j + 1, k + 1, em::ex2)); - c111 = HALF * - (EB(i + 1, j, k + 1, em::ex2) + EB(i + 1, j + 1, k + 1, em::ex2)); - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[1] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Ex3 - c000 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k - 1, em::ex3)); - c100 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k - 1, em::ex3)); - c010 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k - 1, em::ex3)); - c110 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k - 1, em::ex3)); - c001 = HALF * (EB(i, j, k, em::ex3) + EB(i, j, k + 1, em::ex3)); - c101 = HALF * (EB(i + 1, j, k, em::ex3) + EB(i + 1, j, k + 1, em::ex3)); - c011 = HALF * (EB(i, j + 1, k, em::ex3) + EB(i, j + 1, k + 1, em::ex3)); - c111 = HALF * - (EB(i + 1, j + 1, k, em::ex3) + EB(i + 1, j + 1, k + 1, em::ex3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - e0[2] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx1 - c000 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j - 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j - 1, k - 1, em::bx1)); - c100 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j - 1, k, em::bx1) + - EB(i + 1, j, k - 1, em::bx1) + EB(i + 1, j - 1, k - 1, em::bx1)); - c001 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j, k + 1, em::bx1) + - EB(i, j - 1, k, em::bx1) + EB(i, j - 1, k + 1, em::bx1)); - c101 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k + 1, em::bx1) + - EB(i + 1, j - 1, k, em::bx1) + EB(i + 1, j - 1, k + 1, em::bx1)); - c010 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j, k - 1, em::bx1) + EB(i, j + 1, k - 1, em::bx1)); - c110 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j, k - 1, em::bx1) + - EB(i + 1, j + 1, k - 1, em::bx1) + EB(i + 1, j + 1, k, em::bx1)); - c011 = INV_4 * (EB(i, j, k, em::bx1) + EB(i, j + 1, k, em::bx1) + - EB(i, j + 1, k + 1, em::bx1) + EB(i, j, k + 1, em::bx1)); - c111 = INV_4 * - (EB(i + 1, j, k, em::bx1) + EB(i + 1, j + 1, k, em::bx1) + - EB(i + 1, j + 1, k + 1, em::bx1) + EB(i + 1, j, k + 1, em::bx1)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[0] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx2 - c000 = INV_4 * (EB(i - 1, j, k - 1, em::bx2) + EB(i - 1, j, k, em::bx2) + - EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2)); - c100 = INV_4 * (EB(i, j, k - 1, em::bx2) + EB(i, j, k, em::bx2) + - EB(i + 1, j, k - 1, em::bx2) + EB(i + 1, j, k, em::bx2)); - c001 = INV_4 * (EB(i - 1, j, k, em::bx2) + EB(i - 1, j, k + 1, em::bx2) + - EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2)); - c101 = INV_4 * (EB(i, j, k, em::bx2) + EB(i, j, k + 1, em::bx2) + - EB(i + 1, j, k, em::bx2) + EB(i + 1, j, k + 1, em::bx2)); - c010 = INV_4 * - (EB(i - 1, j + 1, k - 1, em::bx2) + EB(i - 1, j + 1, k, em::bx2) + - EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2)); - c110 = INV_4 * - (EB(i, j + 1, k - 1, em::bx2) + EB(i, j + 1, k, em::bx2) + - EB(i + 1, j + 1, k - 1, em::bx2) + EB(i + 1, j + 1, k, em::bx2)); - c011 = INV_4 * - (EB(i - 1, j + 1, k, em::bx2) + EB(i - 1, j + 1, k + 1, em::bx2) + - EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2)); - c111 = INV_4 * - (EB(i, j + 1, k, em::bx2) + EB(i, j + 1, k + 1, em::bx2) + - EB(i + 1, j + 1, k, em::bx2) + EB(i + 1, j + 1, k + 1, em::bx2)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[1] = c0 * (ONE - dx3_) + c1 * dx3_; - - // Bx3 - c000 = INV_4 * (EB(i - 1, j - 1, k, em::bx3) + EB(i - 1, j, k, em::bx3) + - EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3)); - c100 = INV_4 * (EB(i, j - 1, k, em::bx3) + EB(i, j, k, em::bx3) + - EB(i + 1, j - 1, k, em::bx3) + EB(i + 1, j, k, em::bx3)); - c001 = INV_4 * - (EB(i - 1, j - 1, k + 1, em::bx3) + EB(i - 1, j, k + 1, em::bx3) + - EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3)); - c101 = INV_4 * - (EB(i, j - 1, k + 1, em::bx3) + EB(i, j, k + 1, em::bx3) + - EB(i + 1, j - 1, k + 1, em::bx3) + EB(i + 1, j, k + 1, em::bx3)); - c010 = INV_4 * (EB(i - 1, j, k, em::bx3) + EB(i - 1, j + 1, k, em::bx3) + - EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3)); - c110 = INV_4 * (EB(i, j, k, em::bx3) + EB(i, j + 1, k, em::bx3) + - EB(i + 1, j, k, em::bx3) + EB(i + 1, j + 1, k, em::bx3)); - c011 = INV_4 * - (EB(i - 1, j, k + 1, em::bx3) + EB(i - 1, j + 1, k + 1, em::bx3) + - EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3)); - c111 = INV_4 * - (EB(i, j, k + 1, em::bx3) + EB(i, j + 1, k + 1, em::bx3) + - EB(i + 1, j, k + 1, em::bx3) + EB(i + 1, j + 1, k + 1, em::bx3)); - c00 = c000 * (ONE - dx1_) + c100 * dx1_; - c01 = c001 * (ONE - dx1_) + c101 * dx1_; - c10 = c010 * (ONE - dx1_) + c110 * dx1_; - c11 = c011 * (ONE - dx1_) + c111 * dx1_; - c0 = c00 * (ONE - dx2_) + c10 * dx2_; - c1 = c01 * (ONE - dx2_) + c11 * dx2_; - b0[2] = c0 * (ONE - dx3_) + c1 * dx3_; - } - - // Boundary conditions - - // template - // Inline void Pusher_kernel::boundaryConditions_x1(index_t& p) const { - // if (i1(p) < 0) { - // if (is_periodic_i1min) { - // i1(p) += ni1; - // i1_prev(p) += ni1; - // } else if (is_absorb_i1min) { - // tag(p) = ParticleTag::dead; - // } - // } else if (i1(p) >= ni1) { - // if (is_periodic_i1max) { - // i1(p) -= ni1; - // i1_prev(p) -= ni1; - // } else if (is_absorb_i1max) { - // tag(p) = ParticleTag::dead; - // } - // } - // } - - // template - // Inline void Pusher_kernel::boundaryConditions_x2(index_t& p) const { - // if (i2(p) < 0) { - // if (is_periodic_i2min) { - // i2(p) += ni2; - // i2_prev(p) += ni2; - // } else if (is_absorb_i2min) { - // tag(p) = ParticleTag::dead; - // } else if (is_ax_i2min) { - // i2(p) = 0; - // dx2(p) = ONE - dx2(p); - // ux1(p) = -ux1(p); - // } - // } else if (i2(p) >= ni2) { - // if (is_periodic_i2max) { - // i2(p) -= ni2; - // i2_prev(p) -= ni2; - // } else if (is_absorb_i2max) { - // tag(p) = ParticleTag::dead; - // } else if (is_ax_i2max) { - // i2(p) = ni2 - 1; - // i2_prev(p) = ni2 - 1; - // dx2(p) = ONE - dx2(p); - // ux1(p) = -ux1(p); - // } - // } - // } - - // template <> - // Inline void Pusher_kernel::boundaryConditions_x3(index_t& p) const { - // if (i3(p) < 0) { - // if (is_periodic_i3min) { - // i3(p) += ni3; - // i3_prev(p) += ni3; - // } else if (is_absorb_i3min) { - // tag(p) = ParticleTag::dead; - // } - // } else if (i3(p) >= ni3) { - // if (is_periodic_i3max) { - // i3(p) -= ni3; - // i3_prev(p) -= ni3; - // } else if (is_absorb_i3max) { - // tag(p) = ParticleTag::dead; - // } - // } - // } - - // template <> - // Inline void Pusher_kernel::boundaryConditions(index_t& p) const { - // boundaryConditions_x1(p); - // } - - // template <> - // Inline void Pusher_kernel::boundaryConditions(index_t& p) const { - // boundaryConditions_x1(p); - // boundaryConditions_x2(p); - // } - - // template <> - // Inline void Pusher_kernel::boundaryConditions(index_t& p) const { - // boundaryConditions_x1(p); - // boundaryConditions_x2(p); - // boundaryConditions_x3(p); - // } - - // External force - -#ifdef EXTERNAL_FORCE - - #ifdef MINKOWSKI_METRIC - template - Inline void Pusher_kernel::initForce(coord_t& xp, - vec_t& force_Cart) const { - coord_t xp_Ph { ZERO }; - metric.x_Code2Cart(xp, xp_Ph); - const vec_t force_Hat { pgen.ext_force_x1(time, xp_Ph), - pgen.ext_force_x2(time, xp_Ph), - pgen.ext_force_x3(time, xp_Ph) }; - metric.v3_Hat2Cart(xp, force_Hat, force_Cart); - } - #else - template - Inline void Pusher_kernel::initForce(coord_t& xp, - vec_t& force_Cart) const { - coord_t xp_Ph { ZERO }; - coord_t xp_Code { ZERO }; - for (short d { 0 }; d < static_cast(PrtlCoordD); ++d) { - xp_Code[d] = xp[d]; - } - metric.x_Code2Sph(xp_Code, xp_Ph); - const vec_t force_Hat { pgen.ext_force_x1(time, xp_Ph), - pgen.ext_force_x2(time, xp_Ph), - pgen.ext_force_x3(time, xp_Ph) }; - metric.v3_Hat2Cart(xp_Code, force_Hat, force_Cart); - } - #endif - - template - Inline void Pusher_kernel::forceHalfUpdate(index_t& p, - vec_t& force_Cart) const { - ux1(p) += HALF * dt * force_Cart[0]; - ux2(p) += HALF * dt * force_Cart[1]; - ux3(p) += HALF * dt * force_Cart[2]; - } - -#endif - -} // namespace ntt -#endif diff --git a/legacy/src/pic/pgen/old/debug.cpp b/legacy/src/pic/pgen/old/debug.cpp deleted file mode 100644 index 944f5a872..000000000 --- a/legacy/src/pic/pgen/old/debug.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include "wrapper.h" -#include "io/input.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" -#include "particle_macros.h" - -#include "problem_generator.hpp" - -#include - -namespace ntt { - - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock& mblock) { - - Kokkos::parallel_for( - "UserInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - real_t i_ {(real_t)(static_cast(i) - N_GHOSTS)}, - j_ {(real_t)(static_cast(j) - N_GHOSTS)}; - real_t ex2_hat {0.1}, bx3_hat {1.0}; - vec_t e_cntrv, b_cntrv; - mblock.metric.v_Hat2Cntrv({i_ + HALF, j_}, {ZERO, ex2_hat, ZERO}, e_cntrv); - mblock.metric.v_Hat2Cntrv({i_ + HALF, j_ + HALF}, {ZERO, ZERO, bx3_hat}, b_cntrv); - mblock.em(i, j, em::ex1) = ZERO; - mblock.em(i, j, em::ex2) = ZERO; - mblock.em(i, j, em::ex3) = ZERO; - mblock.em(i, j, em::bx1) = ZERO; - mblock.em(i, j, em::bx2) = ZERO; - mblock.em(i, j, em::bx3) = ZERO; - }); - } - - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock& mblock) { - auto& electrons = mblock.particles[0]; - auto& positrons = mblock.particles[1]; - auto random_pool = *(mblock.random_pool_ptr); - Kokkos::parallel_for( - "UserInitPrtls", CreateRangePolicy({0}, {1}), Lambda(index_t p) { - typename RandomNumberPool_t::generator_type rand_gen = random_pool.get_state(); - real_t rx = rand_gen.frand((real_t)(-2.0), (real_t)(2.0)); - real_t ry = rand_gen.frand((real_t)(-2.0), (real_t)(2.0)); - init_prtl_2d_XYZ(mblock, electrons, p, rx, ry, 1.0, 0.0, 0.0); - init_prtl_2d_XYZ(mblock, positrons, p, rx, ry, 1.0, 0.0, 0.0); - // init_prtl_2d_XYZ(&mblock, 2, p, 0.1, 0.12, 1.0, 0.0, 0.0); - // init_prtl_2d_XYZ(&mblock, 3, p, 0.1, 0.12, 1.0, 0.0, 0.0); - }); - electrons.setNpart(1); - positrons.setNpart(1); - // mblock.particles[2].setNpart(1); - // mblock.particles[3].setNpart(1); - } - // 1D - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock&) {} - - // 3D - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock&) {} - -} // namespace ntt - -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; diff --git a/legacy/src/pic/pgen/old/debug.hpp b/legacy/src/pic/pgen/old/debug.hpp deleted file mode 100644 index 06108ad2c..000000000 --- a/legacy/src/pic/pgen/old/debug.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - template - struct ProblemGenerator { - ProblemGenerator(const SimulationParams&) {} - - void UserInitFields(const SimulationParams&, Meshblock&); - void UserInitParticles(const SimulationParams&, Meshblock&); - void UserBCFields(const real_t&, const SimulationParams&, Meshblock&); - Inline auto UserTargetField_br_hat(const Meshblock&, const coord_t&) const - -> real_t { - return ZERO; - } - void UserDriveParticles(const real_t&, const SimulationParams&, Meshblock&) {} - }; - -} // namespace ntt - -#endif diff --git a/legacy/src/pic/pgen/old/deposit.cpp b/legacy/src/pic/pgen/old/deposit.cpp deleted file mode 100644 index 568e68ea9..000000000 --- a/legacy/src/pic/pgen/old/deposit.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "wrapper.h" -#include "io/input.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -#include "problem_generator.hpp" - -#include - -namespace ntt { - - template <> - void ProblemGenerator::userInitFields( - const SimulationParams&, Meshblock& mblock) { - - Kokkos::parallel_for( - "userInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - real_t i_ {(real_t)(static_cast(i) - N_GHOSTS)}, - j_ {(real_t)(static_cast(j) - N_GHOSTS)}; - // real_t ex2_hat {0.1}, bx3_hat {1.0}; - // vec_t e_cntrv, b_cntrv; - // mblock.metric.v_Hat2Cntrv({i_ + HALF, j_}, {ZERO, ex2_hat, ZERO}, e_cntrv); - // mblock.metric.v_Hat2Cntrv({i_ + HALF, j_ + HALF}, {ZERO, ZERO, bx3_hat}, b_cntrv); - mblock.em(i, j, em::ex1) = ZERO; - mblock.em(i, j, em::ex2) = ZERO; // e_cntrv[1]; - mblock.em(i, j, em::ex3) = ZERO; - mblock.em(i, j, em::bx1) = ZERO; - mblock.em(i, j, em::bx2) = ZERO; - mblock.em(i, j, em::bx3) = ZERO; // b_cntrv[2]; - }); - } - - template <> - void ProblemGenerator::userInitParticles( - const SimulationParams&, Meshblock& mblock) { - - Kokkos::parallel_for( - "userInitPrtls", CreateRangePolicy({0}, {1}), Lambda(index_t p) { - coord_t x {0.0, 0.0}, x_CU; - mblock.metric.x_Cart2Code(x, x_CU); - auto [i1, dx1] = mblock.metric.CU_to_Idi(x_CU[0]); - auto [i2, dx2] = mblock.metric.CU_to_Idi(x_CU[1]); - // electron - mblock.particles[0].i1(p) = i1; - mblock.particles[0].i2(p) = i2; - mblock.particles[0].dx1(p) = dx1; - mblock.particles[0].dx2(p) = dx2; - // mblock.particles[0].ux1(p) = -2.0; - // mblock.particles[0].ux2(p) = -5.0; - mblock.particles[0].ux1(p) = 12.0; - // positron - mblock.particles[1].i1(p) = i1; - mblock.particles[1].i2(p) = i2; - mblock.particles[1].dx1(p) = dx1; - mblock.particles[1].dx2(p) = dx2; - // mblock.particles[0].ux1(p) = -2.0; - // mblock.particles[0].ux2(p) = 6.0; - // mblock.particles[1].ux3(p) = 0.0; - }); - mblock.particles[0].setNpart(1); - mblock.particles[1].setNpart(1); - } - // 1D - template <> - void ProblemGenerator::userInitFields( - const SimulationParams&, Meshblock&) {} - template <> - void ProblemGenerator::userInitParticles( - const SimulationParams&, Meshblock&) {} - - // 3D - template <> - void ProblemGenerator::userInitFields( - const SimulationParams&, Meshblock&) {} - template <> - void ProblemGenerator::userInitParticles( - const SimulationParams&, Meshblock&) {} - -} // namespace ntt - -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; \ No newline at end of file diff --git a/legacy/src/pic/pgen/old/deposit.hpp b/legacy/src/pic/pgen/old/deposit.hpp deleted file mode 100644 index 323f05297..000000000 --- a/legacy/src/pic/pgen/old/deposit.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - template - struct ProblemGenerator { - ProblemGenerator(const SimulationParams&) {} - - void userInitFields(const SimulationParams&, Meshblock&); - void userInitParticles(const SimulationParams&, Meshblock&); - void userBCFields(const real_t&, const SimulationParams&, Meshblock&); - Inline auto userTargetField_br_hat(const Meshblock&, const coord_t&) const - -> real_t { - return ZERO; - } - }; -} // namespace ntt - -#endif diff --git a/legacy/src/pic/pgen/old/em.cpp b/legacy/src/pic/pgen/old/em.cpp deleted file mode 100644 index bb790de3c..000000000 --- a/legacy/src/pic/pgen/old/em.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "wrapper.h" -#include "io/input.h" -#include "field_macros.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -#include "problem_generator.hpp" - -namespace ntt { - - template <> - ProblemGenerator::ProblemGenerator(const SimulationParams& params) { - m_nx1 = readFromInput(params.inputdata(), "problem", "nx1", 1); - m_nx2 = readFromInput(params.inputdata(), "problem", "nx2", 1); - m_amplitude = readFromInput(params.inputdata(), "problem", "amplitude", 1.0); - } - - Inline void emWaveField(const coord_t& x_ph, - vec_t& e_out, - vec_t& b_out, - real_t ex_ampl, - real_t ey_ampl, - real_t bz_ampl, - real_t kx, - real_t ky) { - e_out[0] = ex_ampl * math::sin(kx * x_ph[0] + ky * x_ph[1]); - e_out[1] = ey_ampl * math::sin(kx * x_ph[0] + ky * x_ph[1]); - b_out[2] = bz_ampl * math::sin(kx * x_ph[0] + ky * x_ph[1]); - } - - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock&) {} - - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock& mblock) { - - real_t sx = mblock.metric.x1_max - mblock.metric.x1_min; - real_t sy = mblock.metric.x2_max - mblock.metric.x2_min; - real_t kx = constant::TWO_PI * m_nx1 / sx; - real_t ky = constant::TWO_PI * m_nx2 / sy; - real_t ex_ampl, ey_ampl, bz_ampl = m_amplitude; - ex_ampl = -ky; - ey_ampl = kx; - ex_ampl = m_amplitude * ex_ampl / math::sqrt(ex_ampl * ex_ampl + ey_ampl * ey_ampl); - ey_ampl = m_amplitude * ey_ampl / math::sqrt(ex_ampl * ex_ampl + ey_ampl * ey_ampl); - Kokkos::parallel_for( - "userInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - set_em_fields_2d(mblock, i, j, emWaveField, ex_ampl, ey_ampl, bz_ampl, kx, ky); - }); - } - - template <> - void ProblemGenerator::UserDriveParticles(const real_t&, - const SimulationParams&, - Meshblock&) {} - - template <> - void ProblemGenerator::UserBCFields(const real_t&, - const SimulationParams&, - Meshblock&) {} - template <> - Inline auto ProblemGenerator::UserTargetField_br_hat( - const Meshblock&, const coord_t&) const -> real_t { - return ZERO; - } - - // clang-format off - @PgenPlaceholder1D@ - @PgenPlaceholder3D@ - // clang-format on - -} // namespace ntt - -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; - -// real_t ex_ampl, ey_ampl, bz_ampl {m_amplitude}; -// ex_ampl = -ky; -// ey_ampl = kx; -// ex_ampl = m_amplitude * ex_ampl / math::sqrt(ex_ampl * ex_ampl + ey_ampl * ey_ampl); -// ey_ampl = m_amplitude * ey_ampl / math::sqrt(ex_ampl * ex_ampl + ey_ampl * ey_ampl); -// Kokkos::parallel_for( -// "userInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { -// // index to code units -// real_t i_ {(real_t)(static_cast(i) - N_GHOSTS)}, -// j_ {(real_t)(static_cast(j) - N_GHOSTS)}; - -// // code units to cartesian (physical units) -// coord_t xy_, xy_half; -// mblock.metric.x_Code2Cart({i_, j_}, xy_); -// mblock.metric.x_Code2Cart({i_ + HALF, j_ + HALF}, xy_half); - -// // hatted fields -// real_t ex_hat {ex_ampl * math::sin(kx * xy_half[0] + ky * xy_[1])}; -// real_t ey_hat {ey_ampl * math::sin(kx * xy_[0] + ky * xy_half[1])}; -// real_t bz_hat {bz_ampl * math::sin(kx * xy_half[0] + ky * xy_half[1])}; - -// vec_t ex_cntr, ey_cntr, bz_cntr; -// mblock.metric.v_Hat2Cntrv({i_ + HALF, j_}, {ex_hat, ZERO, ZERO}, ex_cntr); -// mblock.metric.v_Hat2Cntrv({i_, j_ + HALF}, {ZERO, ey_hat, ZERO}, ey_cntr); -// mblock.metric.v_Hat2Cntrv({i_ + HALF, j_ + HALF}, {ZERO, ZERO, bz_hat}, bz_cntr); - -// mblock.em(i, j, em::ex1) = ex_cntr[0]; -// mblock.em(i, j, em::ex2) = ey_cntr[1]; -// mblock.em(i, j, em::bx3) = bz_cntr[2]; -// }); diff --git a/legacy/src/pic/pgen/old/em.hpp b/legacy/src/pic/pgen/old/em.hpp deleted file mode 100644 index a1db5785d..000000000 --- a/legacy/src/pic/pgen/old/em.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - template - struct ProblemGenerator { - ProblemGenerator(const SimulationParams&); - - void UserInitFields(const SimulationParams&, Meshblock&); - void UserInitParticles(const SimulationParams&, Meshblock&); - void UserBCFields(const real_t&, const SimulationParams&, Meshblock&); - Inline auto UserTargetField_br_hat(const Meshblock&, const coord_t&) const - -> real_t; - void UserDriveParticles(const real_t&, const SimulationParams&, Meshblock&); - - private: - int m_nx1, m_nx2; - real_t m_amplitude; - }; - -} // namespace ntt - -#endif \ No newline at end of file diff --git a/legacy/src/pic/pgen/old/magnetosphere.hpp b/legacy/src/pic/pgen/old/magnetosphere.hpp deleted file mode 100644 index 15aedc28e..000000000 --- a/legacy/src/pic/pgen/old/magnetosphere.hpp +++ /dev/null @@ -1,220 +0,0 @@ - -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" - -#include "field_macros.h" -#include "sim_params.h" - -#include "meshblock/meshblock.h" - -#include "utils/archetypes.hpp" -#include "utils/injector.hpp" - -namespace ntt { - enum FieldMode { MonopoleField = 1, DipoleField = 2 }; - - template - struct ProblemGenerator : public PGen { - inline ProblemGenerator(const SimulationParams& params) - : r_surf { params.get("problem", "r_surf", (real_t)(1.0)) }, - b_surf { params.get("problem", "b_surf", (real_t)(1.0)) }, - spin_omega { params.get("problem", "spin_omega") }, - spinup_time { params.get("problem", "spinup_time", 0.0) }, - inj_fraction { params.get("problem", "inj_fraction", (real_t)(0.1)) }, - field_mode { params.get("problem", "field_mode", 2) == 2 ? DipoleField - : MonopoleField }, - inj_rmax { params.get("problem", "inj_rmax", (real_t)(1.5)) } {} - - inline void UserInitFields(const SimulationParams&, Meshblock&) override {} - inline void UserDriveFields(const real_t&, - const SimulationParams&, - Meshblock&) override {} - inline void UserDriveParticles(const real_t&, - const SimulationParams&, - Meshblock&) override {} - - private: - const real_t r_surf, b_surf, spin_omega, spinup_time, inj_fraction, inj_rmax; - const FieldMode field_mode; - }; - - Inline void mainBField(const coord_t& x_ph, - vec_t&, - vec_t& b_out, - real_t _rsurf, - real_t _bsurf, - int _mode) { - if (_mode == 2) { - b_out[0] = _bsurf * math::cos(x_ph[1]) / CUBE(x_ph[0] / _rsurf); - b_out[1] = _bsurf * HALF * math::sin(x_ph[1]) / CUBE(x_ph[0] / _rsurf); - b_out[2] = ZERO; - } else { - b_out[0] = _bsurf * SQR(_rsurf / x_ph[0]); - b_out[1] = ZERO; - b_out[2] = ZERO; - } - } - - Inline void surfaceRotationField(const coord_t& x_ph, - vec_t& e_out, - vec_t& b_out, - real_t _rsurf, - real_t _bsurf, - int _mode, - real_t _omega) { - mainBField(x_ph, e_out, b_out, _rsurf, _bsurf, _mode); - e_out[0] = _omega * b_out[1] * x_ph[0] * math::sin(x_ph[1]); - e_out[1] = -_omega * b_out[0] * x_ph[0] * math::sin(x_ph[1]); - e_out[2] = 0.0; - } - - template <> - inline void ProblemGenerator::UserInitFields( - const SimulationParams& params, Meshblock& mblock) { - const auto _rsurf = r_surf; - { - const auto _rmin = mblock.metric.x1_min; - coord_t x_ph { r_surf, ZERO }; - coord_t xi { ZERO }; - mblock.metric.x_Sph2Code(x_ph, xi); - NTTHostErrorIf(_rmin >= _rsurf, "rmin > r_surf"); - NTTHostErrorIf(xi[0] < params.currentFilters(), "r_surf - rmin < filters"); - } - const auto _bsurf = b_surf; - const int _mode = field_mode; - Kokkos::parallel_for( - "UserInitFields", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - set_em_fields_2d(mblock, i, j, mainBField, _rsurf, _bsurf, _mode); - }); - } - - template <> - inline void ProblemGenerator::UserDriveFields( - const real_t& time, const SimulationParams&, Meshblock& mblock) { - { - coord_t x_ph { r_surf, ZERO }; - coord_t xi { ZERO }; - mblock.metric.x_Sph2Code(x_ph, xi); - const auto i1_surf = (unsigned int)(xi[0] + N_GHOSTS); - const auto _mode = field_mode; - const auto _rsurf = r_surf; - const auto _bsurf = b_surf; - const auto i1_min = mblock.i1_min(); - const auto _omega - = (time < spinup_time) ? (time / spinup_time) * spin_omega : spin_omega; - - Kokkos::parallel_for( - "UserDriveFields_rmin", - CreateRangePolicy({ i1_min, mblock.i2_min() }, { i1_surf, mblock.i2_max() }), - Lambda(index_t i1, index_t i2) { - set_ex2_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - set_ex3_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - set_bx1_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - if (i1 < i1_surf - 1) { - set_ex1_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - set_bx2_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - set_bx3_2d(mblock, i1, i2, surfaceRotationField, _rsurf, _bsurf, _mode, _omega); - } - }); - } - } - - template - struct PgenTargetFields : public TargetFields { - PgenTargetFields(const SimulationParams& params, const Meshblock& mblock) - : TargetFields(params, mblock), - _rsurf { params.get("problem", "r_surf", (real_t)(1.0)) }, - _bsurf { params.get("problem", "b_surf", (real_t)(1.0)) }, - _mode { params.get("problem", "field_mode", 2) } {} - Inline real_t operator()(const em& comp, const coord_t& xi) const override { - if ((comp == em::bx1) || (comp == em::bx2)) { - vec_t e_out { ZERO }, b_out { ZERO }; - coord_t x_ph { ZERO }; - (this->m_mblock).metric.x_Code2Sph(xi, x_ph); - mainBField(x_ph, e_out, b_out, _rsurf, _bsurf, _mode); - return (comp == em::bx1) ? b_out[0] : b_out[1]; - } else { - return ZERO; - } - } - - private: - const real_t _rsurf, _bsurf; - const int _mode; - }; - - template - struct RadialKick : public EnergyDistribution { - RadialKick(const SimulationParams& params, const Meshblock& mblock) - : EnergyDistribution(params, mblock), - u_kick { params.get("problem", "u_kick", ZERO) } {} - Inline void operator()(const coord_t&, vec_t& v, const int&) const override { - v[0] = u_kick; - } - - private: - const real_t u_kick; - }; - - template - struct InjectionShell : public SpatialDistribution { - explicit InjectionShell(const SimulationParams& params, Meshblock& mblock) - : SpatialDistribution(params, mblock), - _inj_rmin { params.get("problem", "r_surf", (real_t)(1.0)) }, - _inj_rmax { params.get("problem", "inj_rmax", (real_t)(1.5)) } { - NTTHostErrorIf(_inj_rmin >= _inj_rmax, "inj_rmin >= inj_rmax"); - } - Inline real_t operator()(const coord_t& x_ph) const { - return ((x_ph[0] <= _inj_rmax) && (x_ph[0] > _inj_rmin)) ? ONE : ZERO; - } - - private: - const real_t _inj_rmin, _inj_rmax; - }; - - template - struct MaxDensCrit : public InjectionCriterion { - explicit MaxDensCrit(const SimulationParams& params, Meshblock& mblock) - : InjectionCriterion(params, mblock), - _inj_maxdens { params.get("problem", "inj_maxdens", (real_t)(5.0)) } {} - Inline bool operator()(const coord_t&) const { - return true; - } - - private: - const real_t _inj_maxdens; - }; - - template <> - Inline bool MaxDensCrit::operator()(const coord_t& xph) const { - coord_t xi { ZERO }; - (this->m_mblock).metric.x_Sph2Code(xph, xi); - auto i1 = (std::size_t)(xi[0]) + N_GHOSTS; - auto i2 = (std::size_t)(xi[1]) + N_GHOSTS; - if (i1 < (this->m_mblock).buff.extent(0) && i2 < (this->m_mblock).buff.extent(1)) { - // return true; - return (this->m_mblock).buff(i1, i2, 2) < _inj_maxdens; - } else { - return false; - } - } - - template <> - inline void ProblemGenerator::UserDriveParticles( - const real_t&, const SimulationParams& params, Meshblock& mblock) { - mblock.ComputeMoments(params, FieldID::Rho, {}, { 1, 2 }, 2, 0); - WaitAndSynchronize(); - auto nppc_per_spec = (real_t)(params.ppc0()) * inj_fraction * HALF; - InjectInVolume( - params, - mblock, - { 1, 2 }, - nppc_per_spec, - { mblock.metric.x1_min, inj_rmax, mblock.metric.x2_min, mblock.metric.x2_max }); - } - -} // namespace ntt - -#endif \ No newline at end of file diff --git a/legacy/src/pic/pgen/old/oneprtl.cpp b/legacy/src/pic/pgen/old/oneprtl.cpp deleted file mode 100644 index d4f5e8d00..000000000 --- a/legacy/src/pic/pgen/old/oneprtl.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "wrapper.h" -#include "io/input.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" -#include "particle_macros.h" - -#include "problem_generator.hpp" - -#include - -namespace ntt { - - template <> - ProblemGenerator::ProblemGenerator(const SimulationParams&) {} - - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock& mblock) { - Kokkos::parallel_for( - "UserInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - real_t i_ {(real_t)(static_cast(i) - N_GHOSTS)}, - j_ {(real_t)(static_cast(j) - N_GHOSTS)}; - // real_t ex2_hat {0.1}, bx3_hat {1.0}; - vec_t e_cntrv; - mblock.metric.v_Hat2Cntrv({i_, j_}, {1e-4, ZERO, ZERO}, e_cntrv); - mblock.em(i, j, em::ex1) = e_cntrv[0]; - // mblock.em(i, j, em::ex2) = e_cntrv[1]; - // mblock.em(i, j, em::ex3) = ZERO; - // mblock.em(i, j, em::bx1) = ZERO; - // mblock.em(i, j, em::bx2) = ZERO; - // mblock.em(i, j, em::bx3) = b_cntrv[2]; - }); - } - - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock& mblock) { - auto& electrons = mblock.particles[0]; - // auto& positrons = mblock.particles[1]; - electrons.setNpart(1); - // positrons.setNpart(1); - Kokkos::parallel_for( - "UserInitPrtls", CreateRangePolicy({0}, {1}), Lambda(index_t p) { - real_t rx = 0.0, ry = 0.0; - init_prtl_2d_XYZ(mblock, electrons, p, rx, ry, 0.0, 0.0, 0.0); - // init_prtl_2d_XYZ(mblock, positrons, p, rx, ry, 1.0, 0.0, 0.0); - }); - } - - template <> - void ProblemGenerator::UserDriveParticles(const real_t&, - const SimulationParams&, - Meshblock&) {} - - template <> - void ProblemGenerator::UserBCFields(const real_t&, - const SimulationParams&, - Meshblock&) {} - template <> - Inline auto ProblemGenerator::UserTargetField_br_hat( - const Meshblock&, const coord_t&) const -> real_t { - return ZERO; - } - - // clang-format off - @PgenPlaceholder1D@ - @PgenPlaceholder3D@ - // clang-format on - -} // namespace ntt - -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; diff --git a/legacy/src/pic/pgen/old/oneprtl.hpp b/legacy/src/pic/pgen/old/oneprtl.hpp deleted file mode 100644 index 15e92b2a3..000000000 --- a/legacy/src/pic/pgen/old/oneprtl.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - template - struct ProblemGenerator { - ProblemGenerator(const SimulationParams&); - - void UserInitFields(const SimulationParams&, Meshblock&); - void UserInitParticles(const SimulationParams&, Meshblock&); - void UserBCFields(const real_t&, const SimulationParams&, Meshblock&); - Inline auto UserTargetField_br_hat(const Meshblock&, const coord_t&) const - -> real_t; - void UserDriveParticles(const real_t&, const SimulationParams&, Meshblock&); - }; - -} // namespace ntt - -#endif diff --git a/legacy/src/pic/pgen/old/oneprtl_sph.cpp b/legacy/src/pic/pgen/old/oneprtl_sph.cpp deleted file mode 100644 index 810145a46..000000000 --- a/legacy/src/pic/pgen/old/oneprtl_sph.cpp +++ /dev/null @@ -1,153 +0,0 @@ -#include "wrapper.h" -#include "io/input.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" -#include "particle_macros.h" - -#include "problem_generator.hpp" - -#include - -namespace ntt { - - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock& mblock) { - - Kokkos::parallel_for( - "UserInitFlds", mblock.rangeActiveCells(), Lambda(index_t i, index_t j) { - mblock.em(i, j, em::bx1) = ZERO; - mblock.em(i, j, em::bx2) = ZERO; - }); - } - - template <> - void ProblemGenerator::UserBCFields(const real_t&, - const SimulationParams&, - Meshblock& mblock) { - Kokkos::parallel_for( - "2d_bc_rmin", - CreateRangePolicy({N_GHOSTS, 0}, {N_GHOSTS + 2, mblock.i2_max() + N_GHOSTS}), - Lambda(index_t i, index_t j) { - mblock.em(i, j, em::bx1) = ZERO; - mblock.em(i, j, em::ex2) = ZERO; - mblock.em(i, j, em::ex3) = ZERO; - }); - } - - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock& mblock) { - auto& electrons = mblock.particles[0]; - auto& positrons = mblock.particles[1]; - Kokkos::parallel_for( - "UserInitPrtls", CreateRangePolicy({0}, {1}), Lambda(index_t p) { - init_prtl_2d_Sph(mblock, electrons, p, 3.0, ntt::constant::PI * 0.002, 0.0, 0.0, 0.0); - init_prtl_2d_Sph(mblock, positrons, p, 3.0, ntt::constant::PI * 0.002, 0.0, 0.0, 0.0); - }); - mblock.particles[0].setNpart(1); - mblock.particles[1].setNpart(1); - } - - template <> - void ProblemGenerator::UserDriveParticles(const real_t& t, - const SimulationParams&, - Meshblock& mblock) { - real_t dt = mblock.timestep(); - if (t < 400 * dt) { - auto electron = mblock.particles[0]; - Kokkos::parallel_for( - "UserDrivePrtls", CreateRangePolicy({0}, {1}), Lambda(index_t p) { - real_t vel = 2.0 * (math::tanh((t - 350.0 * dt) / (100.0 * dt)) + 1.0) / 2.0; - electron.ux1(p) = vel * math::sin(constant::PI * 0.25); - electron.ux3(p) = vel * math::cos(constant::PI * 0.25); - }); - } - } - - // 1D - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserBCFields(const real_t&, - const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserDriveParticles(const real_t&, - const SimulationParams&, - Meshblock&) {} - - // 3D - template <> - void ProblemGenerator::UserInitFields(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserInitParticles(const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserBCFields(const real_t&, - const SimulationParams&, - Meshblock&) {} - template <> - void ProblemGenerator::UserDriveParticles(const real_t&, - const SimulationParams&, - Meshblock&) {} - -} // namespace ntt - -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; -template struct ntt::ProblemGenerator; - -// real_t i_ {(real_t)(static_cast(i) - N_GHOSTS)}; -// real_t j_ {(real_t)(static_cast(j) - N_GHOSTS)}; -// real_t r_min {mblock.metric.x1_min}; -// coord_t rth_; -// // dipole -// real_t br, btheta; -// // Br -// mblock.metric.x_Code2Sph({i_, j_ + HALF}, rth_); -// br = TWO * math::cos(rth_[1]) / CUBE(rth_[0] / r_min); -// // Btheta -// mblock.metric.x_Code2Sph({i_ + HALF, j_}, rth_); -// btheta = math::sin(rth_[1]) / CUBE(rth_[0] / r_min); - -// vec_t b_cntrv; -// // @comment not quite true (need to separate for each component) -// mblock.metric.v_Hat2Cntrv({i_ + HALF, j_ + HALF}, {br, btheta, ZERO}, b_cntrv); -// mblock.em(i, j, em::bx1) = b_cntrv[0]; -// mblock.em(i, j, em::bx2) = b_cntrv[1]; - -// rotating monopole -// real_t br, bphi, etheta; -//// Etheta -// mblock.metric.x_Code2Sph({i_, j_ + HALF}, rth_); -// etheta = -0.05 * (r_min / rth_[0]) * math::sin(rth_[1]); - -// vec_t cntrv; -// mblock.metric.v_Hat2Cntrv({i_, j_ + HALF}, {ZERO, etheta, ZERO}, cntrv); -// mblock.em(i, j, em::ex2) = cntrv[1]; - -//// Br -// mblock.metric.x_Code2Sph({i_, j_ + HALF}, rth_); -// br = SQR(r_min / rth_[0]); - -// mblock.metric.x_Code2Sph({i_, j_ + HALF}, rth_); -// bphi = -0.05 * (r_min / rth_[0]) * math::sin(rth_[1]); - -// mblock.metric.v_Hat2Cntrv({i_, j_ + HALF}, {br, ZERO, bphi}, cntrv); -// mblock.em(i, j, em::bx1) = cntrv[0]; - -//// Bphi -// mblock.metric.x_Code2Sph({i_ + HALF, j_ + HALF}, rth_); -// br = SQR(r_min / rth_[0]); - -// mblock.metric.x_Code2Sph({i_ + HALF, j_ + HALF}, rth_); -// bphi = -0.05 * (r_min / rth_[0]) * math::sin(rth_[1]); - -// mblock.metric.v_Hat2Cntrv({i_ + HALF, j_ + HALF}, {br, ZERO, bphi}, cntrv); -// mblock.em(i, j, em::bx3) = cntrv[2]; \ No newline at end of file diff --git a/legacy/src/pic/pgen/old/oneprtl_sph.hpp b/legacy/src/pic/pgen/old/oneprtl_sph.hpp deleted file mode 100644 index 720822347..000000000 --- a/legacy/src/pic/pgen/old/oneprtl_sph.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef PROBLEM_GENERATOR_H -#define PROBLEM_GENERATOR_H - -#include "wrapper.h" -#include "sim_params.h" -#include "meshblock/meshblock.h" - -namespace ntt { - - template - struct ProblemGenerator { - ProblemGenerator(const SimulationParams&) {} - - void UserInitFields(const SimulationParams&, Meshblock&); - void UserInitParticles(const SimulationParams&, Meshblock&); - void UserBCFields(const real_t&, const SimulationParams&, Meshblock&); - Inline auto UserTargetField_br_hat(const Meshblock&, const coord_t&) const - -> real_t { - return ZERO; - } - void UserDriveParticles(const real_t&, const SimulationParams&, Meshblock&); - }; - -} // namespace ntt - -#endif diff --git a/legacy/tests/TODO_CMakeLists.txt b/legacy/tests/TODO_CMakeLists.txt deleted file mode 100644 index 3e33a5809..000000000 --- a/legacy/tests/TODO_CMakeLists.txt +++ /dev/null @@ -1,263 +0,0 @@ -# include(CTest) - -set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../src) - -add_subdirectory(${SOURCE_DIR}/global ${CMAKE_CURRENT_BINARY_DIR}/global) -add_subdirectory(${SOURCE_DIR}/kernels ${CMAKE_CURRENT_BINARY_DIR}/kernels) - -set(title "kernels-new") -set(exec test-${title}.xc) -set(src ${title}.cpp) -add_executable(${exec} ${src}) - -target_link_libraries(${exec} ntt_global ntt_kernels) - -# add_test(NAME "Metadomain: ${metric}" COMMAND "test-${title}.xc") - -# # include main source directory for all targets -# include_directories(${SOURCE_DIR}) - -# # --------------------------------- Wrapper -------------------------------- # -# set(WRAPPER ${PROJECT_NAME}-wrapper) -# add_library(${WRAPPER} STATIC ${SOURCE_DIR}/wrapper/kokkos.cpp) - -# # link wrapper with all targets -# link_libraries(${WRAPPER}) - -# # include wrapper header for all targets -# include_directories(${SOURCE_DIR}/wrapper) - -# # -------------------------- Framework & Engines --------------------------- # - -# # include framework headers for all targets -# include_directories(${SOURCE_DIR}/framework) - -# set(all_metrics ${sr_metrics} ${gr_metrics}) - -# # Libraries for all metrics and engines compile framework for all metrics and -# # engines + all engines with corresponding metrics -# function(add_framework_library metric) -# string(TOUPPER ${metric} metric_upper) -# file(GLOB_RECURSE FRAMEWORK_FILES ${SOURCE_DIR}/framework/*.cpp) -# add_library(framework-${metric} STATIC EXCLUDE_FROM_ALL ${FRAMEWORK_FILES}) -# target_compile_options( -# framework-${metric} -# PUBLIC -D${metric_upper}_METRIC -DSIMULATION_METRIC=\"${metric}\" -# -DMETRIC_HEADER=\"metrics/${metric}.h\") -# endfunction() - -# function(add_engine_library metric engine pgen) -# if(${pgen} STREQUAL "dummy") -# set(pgen_full "dummy") -# else() -# if(engine STREQUAL "pic") -# if(metric STREQUAL "minkowski") -# set(pgen_full "srpic-cart/${pgen}") -# else() -# set(pgen_full "srpic-axisym/${pgen}") -# endif() -# else() -# set(pgen_full "grpic-axisym/${pgen}") -# endif() -# endif() - -# string(TOUPPER ${metric} metric_upper) -# string(TOUPPER ${engine} engine_upper) -# set(title engine-${engine}-${metric}) -# set(pgen_name ${pgen}) - -# if(NOT ${pgen} STREQUAL "dummy") -# string(REPLACE "/" "_" pgen_name ${pgen}) -# endif() - -# set(title ${title}-${pgen_name}) -# file(GLOB_RECURSE ${engine_upper}_FILES ${SOURCE_DIR}/engines/${engine}/*.cpp) -# add_library(${title} STATIC EXCLUDE_FROM_ALL ${${engine_upper}_FILES}) -# target_compile_options( -# ${title} -# PUBLIC -D${metric_upper}_METRIC -D${engine_upper}_ENGINE -# -DSIMULATION_METRIC=\"${metric}\" -# -DMETRIC_HEADER=\"metrics/${metric}.h\") -# target_compile_options(${title} PUBLIC "-DPGEN_HEADER=\"../setups/${pgen_full}.hpp\"") -# target_include_directories(${title} PRIVATE ${SOURCE_DIR}/engines -# ${SOURCE_DIR}/engines/${engine}) -# endfunction() - -# foreach(metric ${all_metrics}) -# list(FIND sr_metrics ${metric} sr_metric_index) - -# if(NOT ${sr_metric_index} EQUAL -1) -# set(engine pic) -# else() -# set(engine grpic) -# endif() - -# string(TOUPPER ${metric} metric_upper) -# string(TOUPPER ${engine} engine_upper) - -# add_framework_library(${metric}) -# add_engine_library(${metric} ${engine} dummy) -# add_engine_library(${metric} sandbox dummy) -# endforeach() - -# # ---------------------------------- Tests --------------------------------- # -# enable_testing() - -# # --------------------------------- Utils ---------------------------------- # -# foreach(metric ${all_metrics}) -# set(title utils-metadomain-${metric}) -# add_executable(test-${title}.xc utils-metadomain.cpp) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric}) -# add_test(NAME "Metadomain: ${metric}" COMMAND "test-${title}.xc") - -# if(${output}) -# set(title utils-writer-${metric}) -# set(engine sandbox) -# add_executable(test-${title}.xc utils-writer.cpp) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Writer: ${metric}" COMMAND "test-${title}.xc") -# endif() - -# if(${mpi}) -# set(title utils-comm-${metric}) - -# list(FIND sr_metrics ${metric} sr_metric_index) - -# if(NOT ${sr_metric_index} EQUAL -1) -# set(engine pic) -# else() -# set(engine grpic) -# endif() - -# add_executable(test-${title}.xc utils-comm.cpp) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Comm: ${metric}" COMMAND "test-${title}.xc") -# endif() -# endforeach() - -# # --------------------------------- Metrics -------------------------------- # -# foreach(metric ${all_metrics}) -# list(FIND sr_metrics ${metric} sr_metric_index) - -# if(NOT ${sr_metric_index} EQUAL -1) -# if(${metric} STREQUAL "minkowski") -# set(filename_comp metric-comp-sr-mink.cpp) -# else() -# set(filename_comp metric-comp-sr-sph.cpp) -# endif() - -# else() -# set(filename_comp metric-comp-gr.cpp) -# endif() - -# set(title metric-trans-${metric}) -# add_executable(test-${title}.xc metric-trans.cpp) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric}) - -# add_test(NAME "Vector/Coordinate Transformations: ${metric}" -# COMMAND "test-${title}.xc") - -# set(title metric-comp-${metric}) -# add_executable(test-${title}.xc ${filename_comp}) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric}) - -# add_test(NAME "Metric Components: ${metric}" COMMAND "test-${title}.xc") -# endforeach() - -# if(${mpi} STREQUAL "OFF") -# # --------------------------------- Pusher --------------------------------- # -# set(metric minkowski) -# set(engine pic) -# set(pgen dummy) -# set(title pusher-sr-minkowski) -# add_executable(test-${title}.xc pusher-sr-mink.cpp) -# target_link_libraries(test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}-${pgen}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Pusher: ${engine} ${metric}" COMMAND "test-${title}.xc") - -# # --------------------------------- Deposit -------------------------------- # -# foreach(metric ${all_metrics}) -# set(metric ${metric}) - -# list(FIND sr_metrics ${metric} sr_metric_index) - -# if(NOT ${sr_metric_index} EQUAL -1) -# set(engine pic) -# else() -# set(engine grpic) -# endif() - -# set(pgen dummy) -# set(title deposit-${metric}) -# add_executable(test-${title}.xc deposit.cpp) -# target_link_libraries( -# test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}-${pgen}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Deposit: ${engine} ${metric}" COMMAND "test-${title}.xc") -# endforeach() -# endif() - -# # --------------------------------- Kernel unit tests -------------------------------- -# foreach(metric ${all_metrics}) -# set(metric ${metric}) - -# list(FIND sr_metrics ${metric} sr_metric_index) - -# if(NOT ${sr_metric_index} EQUAL -1) -# set(engine pic) -# else() -# set(engine grpic) -# endif() - -# set(pgen dummy) -# set(title kernels-${metric}) -# add_executable(test-${title}.xc kernels.cpp) -# target_link_libraries( -# test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}-${pgen}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Kernels: ${engine} ${metric}" COMMAND "test-${title}.xc") -# endforeach() - -# foreach(metric ${sr_metrics}) -# set(metric ${metric}) - -# set(engine pic) - -# set(pgen dummy) -# set(title kernels-sr-${metric}) -# add_executable(test-${title}.xc kernels-sr.cpp) -# target_link_libraries( -# test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}-${pgen}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Kernels SR: ${engine} ${metric}" COMMAND "test-${title}.xc") -# endforeach() - -# foreach(metric ${gr_metrics}) -# set(metric ${metric}) - -# set(engine grpic) - -# set(pgen dummy) -# set(title kernels-gr-${metric}) -# add_executable(test-${title}.xc kernels-gr.cpp) -# target_link_libraries( -# test-${title}.xc PUBLIC framework-${metric} -# engine-${engine}-${metric}-${pgen}) -# target_include_directories(test-${title}.xc -# PRIVATE ${SOURCE_DIR}/engines/${engine}) -# add_test(NAME "Kernels GR: ${engine} ${metric}" COMMAND "test-${title}.xc") -# endforeach() diff --git a/legacy/tests/deposit.cpp b/legacy/tests/deposit.cpp deleted file mode 100644 index 08fcbec1c..000000000 --- a/legacy/tests/deposit.cpp +++ /dev/null @@ -1,258 +0,0 @@ -#include "wrapper.h" - -#if defined(PIC_ENGINE) - #include "pic.h" -template -using SimEngine = ntt::PIC; -#else // GRPIC_ENGINE - #include "grpic.h" -template -using SimEngine = ntt::GRPIC; -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - const auto simname = "Deposit-" + std::string(SIMULATION_METRIC); - - const real_t x1_c = 128.5; - const real_t x2_c = 128.5; - const real_t r = 0.4; - real_t omega = 10.0; - - const auto inputdata = toml::table { - { "simulation", - { - { "title", simname }, - { "runtime", 2.0 * ntt::constant::TWO_PI / omega }, - } }, - { "domain", - { - { "resolution", { 256, 256 } }, -#ifdef MINKOWSKI_METRIC - { "extent", { 1.0, 10.0, 1.0, 10.0 } }, -#else - { "extent", { 1.0, 10.0 } }, - { "qsph_r0", 0.0 }, - { "qsph_h", 0.0 }, -#endif - { - "boundaries", - { -#ifdef MINKOWSKI_METRIC - toml::array { "PERIODIC" }, - toml::array { "PERIODIC" }, -#elif defined(PIC_ENGINE) - toml::array { "CUSTOM", "ABSORB" }, - toml::array { "AXIS" }, -#else - toml::array { "OPEN", "ABSORB" }, - toml::array { "AXIS" }, -#endif - }, - }, - } }, - { "units", - { - { "ppc0", 1.0 }, - { "larmor0", 1.0 }, - { "skindepth0", 1.0 }, - } }, - { "particles", - { - { "n_species", 1 }, - } }, - { "algorithm", - { - { "CFL", 0.9 }, - { "current_filters", 0 }, - } }, - { "species_1", - { - { "mass", 1.0 }, - { "charge", -1.0 }, - { "maxnpart", 1e2 }, - } }, - }; - - SimEngine sim(inputdata); - sim.ResetSimulation(); - - { - auto& mblock = sim.meshblock; - auto& electrons = mblock.particles[0]; - - const auto x1 = x1_c + r; - const auto x2 = x2_c; - const auto ux1 = ZERO; - const auto ux2 = r * omega; - - Kokkos::parallel_for( - "InitParticle", - 1, - Lambda(ntt::index_t p) { - electrons.i1(p) = (int)x1; - electrons.i2(p) = (int)x2; - electrons.dx1(p) = x1 - math::floor(x1); - electrons.dx2(p) = x2 - math::floor(x2); - - ntt::vec_t u { ZERO }; -#if defined(MINKOWSKI_METRIC) - mblock.metric.v3_Hat2Cart({ x1, x2 }, { ux1, ux2, ZERO }, u); -#elif defined(GRPIC_ENGINE) - mblock.metric.v3_Hat2Cov({ x1, x2 }, { ux1, ux2, ZERO }, u); -#else - mblock.metric.v3_Hat2Cart({ x1, x2, ZERO }, { ux1, ux2, ZERO }, u); -#endif - electrons.ux1(p) = u[0]; - electrons.ux2(p) = u[1]; - electrons.ux3(p) = u[2]; - electrons.tag(p) = ntt::ParticleTag::alive; - }); - electrons.setNpart(1); - } - - sim.PrintDetails(); - sim.Verify(); - sim.InitialStep(); - - auto& mblock = sim.meshblock; - auto& electrons = mblock.particles[0]; - - // charges in 6 nodes of two neighboring cells - std::vector q_A, q_B, q_C, q_D, q_E, q_F; - - while (sim.time() < sim.params()->totalRuntime()) { - sim.StepForward(ntt::DiagFlags_None); - - const auto t = sim.time(); - real_t ux1, ux2; - if (t > sim.params()->totalRuntime() * HALF) { - ux1 = r * omega * math::sin(omega * t); - ux2 = r * omega * math::cos(omega * t); - } else { - ux1 = -r * omega * math::sin(omega * t); - ux2 = r * omega * math::cos(omega * t); - } - - Kokkos::parallel_for( - "UpdParticle", - 1, - Lambda(ntt::index_t p) { - const auto x1_p = static_cast(electrons.i1(p)) + - static_cast(electrons.dx1(p)); - const auto x2_p = static_cast(electrons.i2(p)) + - static_cast(electrons.dx2(p)); - ntt::vec_t u { ZERO }; -#if defined(MINKOWSKI_METRIC) - mblock.metric.v3_Hat2Cart({ x1_p, x2_p }, { ux1, ux2, ZERO }, u); -#elif defined(GRPIC_ENGINE) - mblock.metric.v3_Hat2Cov({ x1_p, x2_p }, { ux1, ux2, ZERO }, u); -#else - mblock.metric.v3_Hat2Cart({ x1_p, x2_p, ZERO }, { ux1, ux2, ZERO }, u); -#endif - electrons.ux1(p) = u[0]; - electrons.ux2(p) = u[1]; - electrons.ux3(p) = u[2]; - }); - - electrons.SyncHostDevice(); - auto em_h = Kokkos::create_mirror_view(mblock.em); - Kokkos::deep_copy(em_h, mblock.em); - - const auto i0 = 128 + N_GHOSTS, j0 = 128 + N_GHOSTS; - const real_t x0 = 128.0; - const real_t y0 = 128.0; - q_A.push_back( - em_h(i0, j0, ntt::em::ex1) * mblock.metric.sqrt_det_h({ x0 + HALF, y0 }) - - em_h(i0 - 1, j0, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 - HALF, y0 }) + - em_h(i0, j0, ntt::em::ex2) * mblock.metric.sqrt_det_h({ x0, y0 + HALF }) - - em_h(i0, j0 - 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0, y0 - HALF })); - q_B.push_back(em_h(i0 + 1, j0, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 3.0 * HALF, y0 }) - - em_h(i0, j0, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + HALF, y0 }) + - em_h(i0 + 1, j0, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + ONE, y0 + HALF }) - - em_h(i0 + 1, j0 - 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + ONE, y0 - HALF })); - q_C.push_back(em_h(i0 + 2, j0, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 5.0 * HALF, y0 }) - - em_h(i0 + 1, j0, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 3.0 * HALF, y0 }) + - em_h(i0 + 2, j0, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + TWO, y0 + HALF }) - - em_h(i0 + 2, j0 - 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + TWO, y0 - HALF })); - q_D.push_back(em_h(i0, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + HALF, y0 + ONE }) - - em_h(i0 - 1, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 - HALF, y0 + ONE }) + - em_h(i0, j0 + 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0, y0 + 3.0 * HALF }) - - em_h(i0, j0, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0, y0 + HALF })); - q_E.push_back(em_h(i0 + 1, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 3.0 * HALF, y0 + ONE }) - - em_h(i0, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + HALF, y0 + ONE }) + - em_h(i0 + 1, j0 + 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + ONE, y0 + 3.0 * HALF }) - - em_h(i0 + 1, j0, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + ONE, y0 + HALF })); - q_F.push_back(em_h(i0 + 2, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 5.0 * HALF, y0 + ONE }) - - em_h(i0 + 1, j0 + 1, ntt::em::ex1) * - mblock.metric.sqrt_det_h({ x0 + 3.0 * HALF, y0 + ONE }) + - em_h(i0 + 2, j0 + 1, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + TWO, y0 + 3.0 * HALF }) - - em_h(i0 + 2, j0, ntt::em::ex2) * - mblock.metric.sqrt_det_h({ x0 + TWO, y0 + HALF })); - } - std::vector q_error; - auto q_max = std::numeric_limits::min(); - auto q_max_A = std::max_element(q_A.begin(), q_A.end()); - auto q_max_B = std::max_element(q_B.begin(), q_B.end()); - auto q_max_C = std::max_element(q_C.begin(), q_C.end()); - auto q_max_D = std::max_element(q_D.begin(), q_D.end()); - auto q_max_E = std::max_element(q_E.begin(), q_E.end()); - auto q_max_F = std::max_element(q_F.begin(), q_F.end()); - q_max = std::max(*q_max_A, *q_max_B); - q_max = std::max(q_max, *q_max_C); - q_max = std::max(q_max, *q_max_D); - q_max = std::max(q_max, *q_max_E); - q_max = std::max(q_max, *q_max_F); - for (std::size_t t { 0 }; t < q_A.size(); ++t) { - q_error.push_back( - math::abs(q_A[t] + q_B[t] + q_C[t] + q_D[t] + q_E[t] + q_F[t]) / q_max); - } - auto q_err_max = *std::max_element(q_error.begin(), q_error.end()); - if (q_err_max > 10.0 * std::numeric_limits::epsilon()) { - throw std::runtime_error("max(q_error) = " + std::to_string(q_err_max)); - } - } - - catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/tests/kernels-gr.cpp b/legacy/tests/kernels-gr.cpp deleted file mode 100644 index 6962f7c9f..000000000 --- a/legacy/tests/kernels-gr.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include - -#include -#include - -#include "wrapper.h" - -#include METRIC_HEADER - -#include "kernels/particle_pusher_gr.hpp" - -#include "particle_macros.h" - -template -void put_value(ntt::array_t& arr, T value, int i) { - auto arr_h = Kokkos::create_mirror_view(arr); - arr_h(i) = value; - Kokkos::deep_copy(arr, arr_h); -} - -template -auto get_value(const ntt::array_t& arr, int i) -> T { - auto arr_h = Kokkos::create_mirror_view(arr); - Kokkos::deep_copy(arr_h, arr); - return arr_h(i); -} - -template -auto get_physical_coord(const int p, - const ntt::array_t& i1, - const ntt::array_t& i2, - const ntt::array_t& dx1, - const ntt::array_t& dx2, - const M& metric) -> std::pair { - std::pair rth; - ntt::coord_t xC { ZERO }; - ntt::coord_t rtheta { ZERO }; - xC[0] = i_di_to_Xi(get_value(i1, p), get_value(dx1, p)); - xC[1] = i_di_to_Xi(get_value(i2, p), get_value(dx2, p)); - metric.x_Code2Phys(xC, rtheta); - rth.first = rtheta[0]; - rth.second = rtheta[1]; - return rth; -} - -auto dummy_metric(const unsigned int nx1, const unsigned int nx2) - -> ntt::Metric { - const auto resolution = std::vector({ nx1, nx2 }); - const auto extent = std::vector({ 1.0, 100.0, ZERO, ntt::constant::PI }); - const auto qsph_r0 = (real_t)(0.0); - const auto qsph_h = (real_t)(0.25); - - const auto spin = (real_t)(0.5); - const auto rh = ONE + std::sqrt(ONE - SQR(spin)); - - auto params = new real_t[6]; - params[0] = qsph_r0; - params[1] = qsph_h; - params[4] = spin; - params[5] = rh; - ntt::Metric metric(resolution, extent, params); - delete[] params; - return metric; -} - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - constexpr auto nx1 = 100, nx2 = 100; - auto metric = dummy_metric(nx1, nx2); - { - /* -------------------------------------------------------------------------- */ - /* pusher */ - /* -------------------------------------------------------------------------- */ - ntt::ndfield_t DB { "DB", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; - ntt::ndfield_t DB0 { "DB", - nx1 + 2 * N_GHOSTS, - nx2 + 2 * N_GHOSTS }; - ntt::array_t i1 { "i1", 10 }; - ntt::array_t i2 { "i2", 10 }; - ntt::array_t i3 { "i3", 10 }; - ntt::array_t i1_prev { "i1_prev", 10 }; - ntt::array_t i2_prev { "i2_prev", 10 }; - ntt::array_t i3_prev { "i3_prev", 10 }; - ntt::array_t dx1 { "dx1", 10 }; - ntt::array_t dx2 { "dx2", 10 }; - ntt::array_t dx3 { "dx3", 10 }; - ntt::array_t dx1_prev { "dx1_prev", 10 }; - ntt::array_t dx2_prev { "dx2_prev", 10 }; - ntt::array_t dx3_prev { "dx3_prev", 10 }; - ntt::array_t ux1 { "ux1", 10 }; - ntt::array_t ux2 { "ux2", 10 }; - ntt::array_t ux3 { "ux3", 10 }; - ntt::array_t phi { "phi", 10 }; - ntt::array_t weight { "weight", 10 }; - ntt::array_t tag { "tag", 10 }; - - int i1_0, i2_0; - real_t dx1_0, dx2_0; - - const real_t r0 = 50.5, th0 = 1.5; - ntt::coord_t xC_0 { ZERO }; - - const real_t ux1_0 = 1.5, ux2_0 = 0.5, ux3_0 = -1.2; - ntt::vec_t uC_0 { ZERO }; - - metric.x_Phys2Code({ r0, th0 }, xC_0); - from_Xi_to_i_di(xC_0[0], i1_0, dx1_0); - from_Xi_to_i_di(xC_0[1], i2_0, dx2_0); - - metric.v3_Hat2Cov(xC_0, { ux1_0, ux2_0, ux3_0 }, uC_0); - - put_value(i1, i1_0, 0); - put_value(i2, i2_0, 0); - put_value(dx1, dx1_0, 0); - put_value(dx2, dx2_0, 0); - put_value(ux1, uC_0[0], 0); - put_value(ux2, uC_0[1], 0); - put_value(ux3, uC_0[2], 0); - put_value(tag, ntt::ParticleTag::alive, 0); - - std::vector> boundaries; - boundaries.push_back( - std::vector(2, ntt::BoundaryCondition::PERIODIC)); - boundaries.push_back( - std::vector(2, ntt::BoundaryCondition::PERIODIC)); - - auto kernel = ntt::Pusher_kernel>( - DB, - DB0, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - tag, - metric, - ONE, - ONE, - nx1, - nx2, - 1, - static_cast(1.0e-5), - 10, - boundaries); - Kokkos::parallel_for( - "ParticlesPush", - Kokkos::RangePolicy(0, 1), - kernel); - auto [ra, tha] = get_physical_coord(0, i1, i2, dx1, dx2, metric); - const real_t pha = get_value(phi, 0); - - if (metric.rg() != ZERO) { - // for KS with M != 0 - if (!ntt::AlmostEqual(ra, - static_cast(51.115658), - static_cast(1e-4))) { - throw std::runtime_error("r coordinate is not correct"); - } - if (!ntt::AlmostEqual(tha, - static_cast(1.504318), - static_cast(1e-4))) { - throw std::runtime_error("th coordinate is not correct"); - } - if (!ntt::AlmostEqual(pha, - static_cast(6.272962), - static_cast(1e-4))) { - throw std::runtime_error("phi coordinate is not correct"); - } - } else { - // for KS with M == 0 - if (!ntt::AlmostEqual(ra, - static_cast(51.180923), - static_cast(1e-4))) { - throw std::runtime_error("r coordinate is not correct"); - } - if (!ntt::AlmostEqual(tha, - static_cast(1.504381), - static_cast(1e-4))) { - throw std::runtime_error("th coordinate is not correct"); - } - if (!ntt::AlmostEqual(pha, - static_cast(6.272648), - static_cast(1e-4))) { - throw std::runtime_error("phi coordinate is not correct"); - } - } - } - } - - catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - - ntt::GlobalFinalize(); - - return 0; -} diff --git a/legacy/tests/kernels-sr.cpp b/legacy/tests/kernels-sr.cpp deleted file mode 100644 index 3f64122cd..000000000 --- a/legacy/tests/kernels-sr.cpp +++ /dev/null @@ -1,225 +0,0 @@ -#include - -#include -#include - -#include "wrapper.h" - -#include METRIC_HEADER -#include PGEN_HEADER - -#include "kernels/particle_pusher_sr.hpp" - -#include "particle_macros.h" - -template -void put_value(ntt::array_t& arr, T value, int i) { - auto arr_h = Kokkos::create_mirror_view(arr); - arr_h(i) = value; - Kokkos::deep_copy(arr, arr_h); -} - -template -auto get_value(const ntt::array_t& arr, int i) -> T { - auto arr_h = Kokkos::create_mirror_view(arr); - Kokkos::deep_copy(arr_h, arr); - return arr_h(i); -} - -template -auto get_cartesian_coord(const int p, - const ntt::array_t& i1, - const ntt::array_t& i2, - const ntt::array_t& dx1, - const ntt::array_t& dx2, - const ntt::array_t& phi, - const M& metric) -> std::pair { - std::pair xy; -#ifdef MINKOWSKI_METRIC - ntt::coord_t xC { ZERO }; - ntt::coord_t xyz { ZERO }; - xC[0] = i_di_to_Xi(get_value(i1, p), get_value(dx1, p)); - xC[1] = i_di_to_Xi(get_value(i2, p), get_value(dx2, p)); - (void)phi; - metric.x_Code2Cart(xC, xyz); - xy.first = xyz[0]; - xy.second = xyz[1]; -#else - ntt::coord_t xC { ZERO }; - ntt::coord_t xyz { ZERO }; - xC[0] = i_di_to_Xi(get_value(i1, p), get_value(dx1, p)); - xC[1] = i_di_to_Xi(get_value(i2, p), get_value(dx2, p)); - xC[2] = get_value(phi, p); - metric.x_Code2Cart(xC, xyz); - xy.first = xyz[0]; - xy.second = xyz[2]; -#endif - return xy; -} - -auto dummy_metric(const unsigned int nx1, const unsigned int nx2) - -> ntt::Metric { - const auto resolution = std::vector({ nx1, nx2 }); -#ifdef MINKOWSKI_METRIC - const auto extent = std::vector({ 1.0, 100.0, -49.5, 49.5 }); -#else - const auto extent = std::vector({ 1.0, 100.0, ZERO, ntt::constant::PI }); -#endif - // optional for Qspherical - const auto qsph_r0 = (real_t)(0.0); - const auto qsph_h = (real_t)(0.25); - - auto params = new real_t[6]; - params[0] = qsph_r0; - params[1] = qsph_h; - ntt::Metric metric(resolution, extent, params); - delete[] params; - return metric; -} - -auto dummy_pgen() -> ntt::ProblemGenerator { - return ntt::ProblemGenerator(); -} - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - constexpr auto nx1 = 10, nx2 = 10; - auto metric = dummy_metric(nx1, nx2); - auto pgen = dummy_pgen(); - { - /* -------------------------------------------------------------------------- */ - /* pusher */ - /* -------------------------------------------------------------------------- */ - ntt::ndfield_t EB { "EB", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; - ntt::array_t i1 { "i1", 10 }; - ntt::array_t i2 { "i2", 10 }; - ntt::array_t i3 { "i3", 10 }; - ntt::array_t i1_prev { "i1_prev", 10 }; - ntt::array_t i2_prev { "i2_prev", 10 }; - ntt::array_t i3_prev { "i3_prev", 10 }; - ntt::array_t dx1 { "dx1", 10 }; - ntt::array_t dx2 { "dx2", 10 }; - ntt::array_t dx3 { "dx3", 10 }; - ntt::array_t dx1_prev { "dx1_prev", 10 }; - ntt::array_t dx2_prev { "dx2_prev", 10 }; - ntt::array_t dx3_prev { "dx3_prev", 10 }; - ntt::array_t ux1 { "ux1", 10 }; - ntt::array_t ux2 { "ux2", 10 }; - ntt::array_t ux3 { "ux3", 10 }; - ntt::array_t phi { "phi", 10 }; - ntt::array_t weight { "weight", 10 }; - ntt::array_t tag { "tag", 10 }; - - int i1_0, i2_0; - real_t dx1_0, dx2_0; - const real_t ux1_0 = 1.5; - - const real_t x0 = 50.5, y0 = 1.0; - -#ifdef MINKOWSKI_METRIC - const real_t ux2_0 = -1.2, ux3_0 = 0.5; - ntt::coord_t xyz0 { x0, y0 }; - ntt::coord_t xC_0 { ZERO }; -#else - const real_t ux3_0 = -1.2, ux2_0 = 0.5; - ntt::coord_t xyz0 { x0, ZERO, y0 }; - ntt::coord_t xC_0 { ZERO }; -#endif - - const real_t gamma = math::sqrt(ONE + SQR(ux1_0) + SQR(ux2_0) + SQR(ux3_0)); - metric.x_Cart2Code(xyz0, xC_0); - from_Xi_to_i_di(xC_0[0], i1_0, dx1_0); - from_Xi_to_i_di(xC_0[1], i2_0, dx2_0); - - put_value(i1, i1_0, 0); - put_value(i2, i2_0, 0); - put_value(dx1, dx1_0, 0); - put_value(dx2, dx2_0, 0); - put_value(ux1, ux1_0, 0); - put_value(ux2, ux2_0, 0); - put_value(ux3, ux3_0, 0); - put_value(tag, ntt::ParticleTag::alive, 0); - - std::vector> boundaries; - boundaries.push_back( - std::vector(2, ntt::BoundaryCondition::PERIODIC)); - boundaries.push_back( - std::vector(2, ntt::BoundaryCondition::PERIODIC)); - - auto kernel = ntt::Pusher_kernel, - ntt::ProblemGenerator, - ntt::Boris_t, - false>(EB, - i1, - i2, - i3, - i1_prev, - i2_prev, - i3_prev, - dx1, - dx2, - dx3, - dx1_prev, - dx2_prev, - dx3_prev, - ux1, - ux2, - ux3, - phi, - tag, - metric, - pgen, - ZERO, - ONE, - ONE, - nx1, - nx2, - 1, - boundaries, - ZERO, - ZERO, - ZERO); - Kokkos::parallel_for( - "ParticlesPush", - Kokkos::RangePolicy(0, 1), - kernel); - auto [xa, ya] = get_cartesian_coord(0, i1, i2, dx1, dx2, phi, metric); - - if (!ntt::AlmostEqual(xa, - static_cast(x0 + ux1_0 / gamma), - static_cast(1e-4))) { - throw std::runtime_error("x coordinate is not correct"); - } - - if (!ntt::AlmostEqual(ya, - static_cast(y0 - 1.2 / gamma), - static_cast(1e-4))) { - throw std::runtime_error("y/z coordinate is not correct"); - } - - if (!ntt::AlmostEqual(get_value(ux1, 0), ux1_0)) { - throw std::runtime_error("ux1 is not correct"); - } - - if (!ntt::AlmostEqual(get_value(ux2, 0), ux2_0)) { - throw std::runtime_error("ux2 is not correct"); - } - - if (!ntt::AlmostEqual(get_value(ux3, 0), ux3_0)) { - throw std::runtime_error("ux3 is not correct"); - } - } - } - - catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - - ntt::GlobalFinalize(); - - return 0; -} diff --git a/legacy/tests/pusher-sr-mink.cpp b/legacy/tests/pusher-sr-mink.cpp deleted file mode 100644 index 97f37b63c..000000000 --- a/legacy/tests/pusher-sr-mink.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "wrapper.h" - -#include "particle_macros.h" -#include "pic.h" -#include "sim_params.h" - -#include "io/input.h" -#include "meshblock/meshblock.h" -#include "utilities/qmath.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - using namespace toml::literals::toml_literals; - const auto inputdata = R"( - [domain] - resolution = [256, 128, 128] - extent = [-16.0, 16.0, -8.0, 8.0, -8.0, 8.0] - boundaries = [["PERIODIC"], ["PERIODIC"], ["PERIODIC"]] - - [units] - ppc0 = 1.0 - larmor0 = 2.0 - skindepth0 = 1.0 - - [particles] - n_species = 1 - - [species_1] - label = "e+" - mass = 1.0 - charge = 1.0 - maxnpart = 10.0 - - [output] - format = "disabled" - )"_toml; - - auto sim = ntt::PIC(inputdata); - - real_t bx1 = 0.256, bx2 = 0.953, bx3 = -0.234; - const real_t bmag = 2.0; - const real_t u_part = 2.0; - const auto nperiods = 5; - - const auto bb = math::sqrt(SQR(bx1) + SQR(bx2) + SQR(bx3)); - bx1 /= bb; - bx2 /= bb; - bx3 /= bb; - const real_t beta_part = u_part / math::sqrt(ONE + SQR(u_part)); - const real_t ax1 = bx2, ax2 = -bx3, ax3 = bx1; - real_t perp_x1 = ax2 * bx3 - ax3 * bx2; - real_t perp_x2 = ax3 * bx1 - ax1 * bx3; - real_t perp_x3 = ax1 * bx2 - ax2 * bx1; - const real_t perp = math::sqrt(SQR(perp_x1) + SQR(perp_x2) + SQR(perp_x3)); - perp_x1 /= perp; - perp_x2 /= perp; - perp_x3 /= perp; - const auto ux1 = u_part * perp_x1; - const auto ux2 = u_part * perp_x2; - const auto ux3 = u_part * perp_x3; - - auto& mblock = sim.meshblock; - { - Kokkos::parallel_for( - "InitFields", - mblock.rangeActiveCells(), - Lambda(ntt::index_t i1, ntt::index_t i2, ntt::index_t i3) { - ntt::coord_t xi { ZERO }; - ntt::vec_t b_cntrv { ZERO }; - mblock.metric.v3_PhysCntrv2Cntrv(xi, - { bmag * bx1, bmag * bx2, bmag * bx3 }, - b_cntrv); - mblock.em(i1, i2, i3, ntt::em::bx1) = b_cntrv[0]; - mblock.em(i1, i2, i3, ntt::em::bx2) = b_cntrv[1]; - mblock.em(i1, i2, i3, ntt::em::bx3) = b_cntrv[2]; - }); - sim.Communicate(ntt::Comm_B); - } - - { - using namespace ntt; - mblock.particles[0].setNpart(1); - auto positrons = mblock.particles[0]; - Kokkos::parallel_for( - "InitParticles", - positrons.rangeActiveParticles(), - Lambda(ntt::index_t p) { - init_prtl_3d(mblock, positrons, p, 0.0, 0.0, 0.0, ux1, ux2, ux3, 1.0); - }); - } - - { - const auto dt = mblock.timestep(); - const auto larmor = sim.params()->larmor0() * u_part / bmag; - const auto period = ntt::constant::TWO_PI * larmor / beta_part; - auto positrons = mblock.particles[0]; - auto maxdist = ZERO, maxupar = ZERO; - const auto nmax = static_cast(nperiods * period / dt); - for (auto n { 0 }; n < nmax + 1; ++n) { - if (n < nmax) { - sim.ParticlesPush(); - } else { - const auto fraction = (nperiods * period / dt - nmax); - sim.ParticlesPush(fraction); - } - positrons.SyncHostDevice(); - - { - ntt::coord_t xprtl { ZERO }; - ntt::coord_t xi { static_cast(positrons.i1_h(0)) + - static_cast(positrons.dx1_h(0)), - static_cast(positrons.i2_h(0)) + - static_cast(positrons.dx2_h(0)), - static_cast(positrons.i3_h(0)) + - static_cast(positrons.dx3_h(0)) }; - mblock.metric.x_Code2Cart(xi, xprtl); - const auto dist = math::sqrt( - SQR(xprtl[0]) + SQR(xprtl[1]) + SQR(xprtl[2])); - if (dist > maxdist) { - maxdist = dist; - } - if (n == nmax) { - !(ntt::AlmostZero(SQR(dist), (real_t)1e-4)) - ? throw std::logic_error(fmt::format( - "particle not in init position: %.6e != 0.0, L2 = %.6e", - dist, - SQR(dist))) - : (void)0; - !(ntt::AlmostEqual(maxdist, TWO * larmor, (real_t)1e-3)) - ? throw std::logic_error( - fmt::format("maxdist is incorrect: %.6f != %.6f", - maxdist, - TWO * larmor)) - : (void)0; - !(ntt::AlmostZero(maxupar)) - ? throw std::logic_error( - fmt::format("maxupar is nonzero: %f", maxupar)) - : (void)0; - const auto L2_u = SQR(positrons.ux1_h(0) - ux1) + - SQR(positrons.ux2_h(0) - ux2) + - SQR(positrons.ux3_h(0) - ux3); - !(ntt::AlmostZero(L2_u, (real_t)1e-4)) - ? throw std::logic_error( - fmt::format("u_init != u_final: L2 = %.2e", L2_u)) - : (void)0; - } - } - - { - const auto u_mag = math::sqrt(SQR(positrons.ux1_h(0)) + - SQR(positrons.ux2_h(0)) + - SQR(positrons.ux3_h(0))); - !(ntt::AlmostEqual(u_mag, u_part)) - ? throw std::logic_error( - fmt::format("u_mag is incorrect after %d pushes: %.6f != %.6f", - n, - u_mag, - u_part)) - : (void)0; - const auto upar = (positrons.ux1_h(0) * bx1 + positrons.ux2_h(0) * bx2 + - positrons.ux3_h(0) * bx3) / - (u_part * bmag); - if (math::abs(upar) > maxupar) { - maxupar = math::abs(upar); - } - !(ntt::AlmostZero(upar)) - ? throw std::logic_error( - fmt::format("u_|| is nonzero after %d pushes: %.2e", n, upar)) - : (void)0; - } - } - } - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/tests/utils-comm.cpp b/legacy/tests/utils-comm.cpp deleted file mode 100644 index 047c71b3a..000000000 --- a/legacy/tests/utils-comm.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#ifdef MPI_ENABLED - #include "wrapper.h" - - #if defined(SANDBOX_ENGINE) - - #include "sandbox.h" -template -using SimEngine = ntt::SANDBOX; - - #elif defined(PIC_ENGINE) - - #include "pic.h" -template -using SimEngine = ntt::PIC; - - #elif defined(GRPIC_ENGINE) - - #include "grpic.h" -template -using SimEngine = ntt::GRPIC; - - #endif - - #include "sim_params.h" - - #include "communications/decomposition.h" - #include "communications/metadomain.h" - #include "meshblock/meshblock.h" - #include "utilities/qmath.h" - - #include "utilities/injector.hpp" - - #include - #include - #include - #include - - #include - #include - #include - #include - #include - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - toml::table simulation, domain, units, output, algorithm; - toml::table particles, species_1, species_2; - const auto simname = "Writer-" + std::string(SIMULATION_METRIC); - simulation["title"] = simname; - domain["resolution"] = toml::array { 64, 64 }; - - particles["n_species"] = 2; - species_1["mass"] = 0.0; - species_1["charge"] = 0.0; - species_1["maxnpart"] = 1e2; - species_2["mass"] = 0.0; - species_2["charge"] = 0.0; - species_2["maxnpart"] = 1e2; - - #ifdef MINKOWSKI_METRIC - domain["extent"] = toml::array { -1.0, 1.0, -1.0, 1.0 }; - domain["boundaries"] = toml::array { toml::array { "PERIODIC" }, - toml::array { "PERIODIC" } }; - #else - domain["extent"] = toml::array { 0.8, 20.0 }; - domain["boundaries"] = toml::array { - toml::array { "OPEN", "ABSORB" }, - toml::array { "AXIS" } - }; - domain["qsph_r0"] = 0.0; - domain["qsph_h"] = 0.4; - domain["spin"] = 0.5; - #endif - - units["ppc0"] = 1.0; - units["larmor0"] = 0.1; - units["skindepth0"] = 1.0; - - // output["fields"] = toml::array { "E", "B" }; - output["particles"] = toml::array { "X", "U" }; - output["prtl_stride"] = 1; - output["format"] = "HDF5"; - output["as_is"] = true; - output["ghosts"] = true; - - auto inputdata = toml::table { - {"simulation", simulation}, - { "domain", domain}, - { "units", units}, - { "output", output}, - { "particles", particles}, - { "species_1", species_1}, - { "species_2", species_2} - }; - - // write - { - SimEngine sim(inputdata); - auto& mblock { sim.meshblock }; - // allocate fields - mblock.em = ntt::ndfield_t { "em", - mblock.Ni1() + 2 * N_GHOSTS, - mblock.Ni2() + 2 * N_GHOSTS }; - mblock.bckp = ntt::ndfield_t { "bckp", - mblock.Ni1() + 2 * N_GHOSTS, - mblock.Ni2() + 2 * N_GHOSTS }; - - // allocate particles - for (auto& specie : mblock.particles) { - specie.i1 = ntt::array_t { specie.label() + "_i1", - specie.maxnpart() }; - specie.i2 = ntt::array_t { specie.label() + "_i2", - specie.maxnpart() }; - specie.dx1 = ntt::array_t { specie.label() + "_dx1", - specie.maxnpart() }; - specie.dx2 = ntt::array_t { specie.label() + "_dx2", - specie.maxnpart() }; - specie.ux1 = ntt::array_t { specie.label() + "_ux1", - specie.maxnpart() }; - specie.ux2 = ntt::array_t { specie.label() + "_ux2", - specie.maxnpart() }; - specie.ux3 = ntt::array_t { specie.label() + "_ux3", - specie.maxnpart() }; - specie.weight = ntt::array_t { specie.label() + "_w", - specie.maxnpart() }; - #ifndef MINKOWSKI_METRIC - specie.phi = ntt::array_t { specie.label() + "_phi", - specie.maxnpart() }; - #endif - specie.tag = ntt::array_t { specie.label() + "_tag", - specie.maxnpart() }; - } - - { - // fill dummy fields - auto tag = (real_t)sim.metadomain()->mpiRank(); - Kokkos::deep_copy(mblock.em, (real_t)(-100.0)); - Kokkos::parallel_for( - "FillWithDummies-Flds", - mblock.rangeActiveCells(), - Lambda(ntt::index_t i1, ntt::index_t i2) { - mblock.em(i1, i2, ntt::em::ex1) = tag; - mblock.em(i1, i2, ntt::em::ex2) = tag + 0.1; - mblock.em(i1, i2, ntt::em::ex3) = tag + 0.2; - mblock.em(i1, i2, ntt::em::bx1) = tag + 0.3; - mblock.em(i1, i2, ntt::em::bx2) = tag + 0.4; - mblock.em(i1, i2, ntt::em::bx3) = tag + 0.5; - }); - } - - { - if (sim.metadomain()->mpiRank() == 0) { - auto& specie1 = mblock.particles[0]; - auto& specie2 = mblock.particles[1]; - specie1.setNpart(2); - specie2.setNpart(2); - Kokkos::parallel_for( - "FillWithDummies-Prtls", - specie1.rangeActiveParticles(), - Lambda(ntt::index_t p) { - specie1.tag(p) = ntt::ParticleTag::alive; - specie1.i1(p) = 1 + p; - specie1.i2(p) = 1 + 5 * p; - specie1.dx1(p) = 0.5; - specie1.dx2(p) = 0.5; - specie1.ux1(p) = 1.0 + (real_t)(p * 0.5); - specie1.ux2(p) = 1.0 + (real_t)(p * 4.5); - specie1.ux3(p) = 1.0 + (real_t)(p * 0.5); - - specie2.tag(p) = ntt::ParticleTag::alive; - specie2.i1(p) = 1 + 3 * p; - specie2.i2(p) = 1 + 3 * p; - specie2.dx1(p) = 0.5; - specie2.dx2(p) = 0.5; - specie2.ux1(p) = 0.5 + (real_t)(p * 0.5); - specie2.ux2(p) = 0.2 + (real_t)(p * 0.5); - specie2.ux3(p) = -0.1 + (real_t)(p * 0.5); - }); - } - } - { - // advance the fake simulation - const auto nsteps = 100; - for (auto i { 0 }; i < nsteps; ++i) { - sim.Communicate(ntt::Comm_E | ntt::Comm_B); - sim.ParticlesPush(); - sim.ParticlesBoundaryConditions(); - sim.Communicate(ntt::Comm_Prtl); - sim.writer.WriteAll(*sim.params(), - *sim.metadomain(), - mblock, - (real_t)i, - (std::size_t)i); - printf("step: %d, rank: %d, npart1: %ld, npart2: %ld\n", - i, - sim.metadomain()->mpiRank(), - mblock.particles[0].npart(), - mblock.particles[1].npart()); - } - } - } - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} -#endif \ No newline at end of file diff --git a/legacy/tests/utils-metadomain.cpp b/legacy/tests/utils-metadomain.cpp deleted file mode 100644 index b1cf80c70..000000000 --- a/legacy/tests/utils-metadomain.cpp +++ /dev/null @@ -1,159 +0,0 @@ -#include "wrapper.h" - -#include "communications/decomposition.h" -#include "communications/metadomain.h" -#include "utilities/qmath.h" - -#include -#include -#include -#include -#include - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - const auto resolution = std::vector({ 5000, 1800 }); -#ifdef MINKOWSKI_METRIC - const auto extent = std::vector({ 1.0, 100.0, -20.0, 15.64 }); - const auto boundaries = std::vector> { - { ntt::BoundaryCondition::PERIODIC }, - { ntt::BoundaryCondition::OPEN } - }; -#else - const auto extent = std::vector({ 1.0, 100.0, ZERO, ntt::constant::PI }); - const auto boundaries = std::vector> { - { ntt::BoundaryCondition::CUSTOM, ntt::BoundaryCondition::ABSORB }, - { ntt::BoundaryCondition::AXIS } - }; -#endif - // optional for GR - const auto spin = (real_t)(0.9); - const auto rh = ONE + std::sqrt(ONE - SQR(spin)); - // optional for Qspherical - const auto qsph_r0 = (real_t)(0.0); - const auto qsph_h = (real_t)(0.25); - - auto params = new real_t[6]; - params[0] = qsph_r0; - params[1] = qsph_h; - params[4] = spin; - params[5] = rh; - - const auto decomposition = std::vector { 7, 3 }; - - auto metadomain = ntt::Metadomain(resolution, - extent, - decomposition, - params, - boundaries, - true); - - auto first_domain = *metadomain.domainByOffset({ 0, 0 }); - auto last_domain = *metadomain.domainByOffset( - { decomposition[0] - 1, decomposition[1] - 1 }); - for (auto d { 0 }; d < 2; ++d) { - if (first_domain.offsetNdomains()[d] != 0) { - throw std::logic_error("first_domain.offsetNdomains()[d] != 0"); - } - if (first_domain.offsetNcells()[d] != 0) { - throw std::logic_error("first_domain.offsetNcells()[d] != 0"); - } - if (last_domain.offsetNdomains()[d] != decomposition[d] - 1) { - throw std::logic_error( - "last_domain.offsetNdomains()[d] != decomposition[d] - 1"); - } - if (last_domain.offsetNcells()[d] + last_domain.ncells()[d] != resolution[d]) { - throw std::logic_error("last_domain.offsetNcells()[d] + " - "last_domain.ncells()[d] != resolution[d]"); - } - } - - if (!ntt::AlmostEqual(first_domain.extent()[0], extent[0])) { - throw std::logic_error("first_domain.extent()[0] != extent[0]"); - } - if (!ntt::AlmostEqual(first_domain.extent()[2], extent[2])) { - throw std::logic_error("first_domain.extent()[2] != extent[2]"); - } - if (!ntt::AlmostEqual(last_domain.extent()[1], extent[1])) { - throw std::logic_error("last_domain.extent()[1] != extent[1]"); - } - if (!ntt::AlmostEqual(last_domain.extent()[3], extent[3])) { - throw std::logic_error("last_domain.extent()[3] != extent[3]"); - } - if (!(first_domain.boundaries()[0][0] == boundaries[0][0])) { - throw std::logic_error("wrong first_domain.boundaries()[0][0]"); - } - if (!(first_domain.boundaries()[0][1] == ntt::BoundaryCondition::COMM)) { - throw std::logic_error("wrong first_domain.boundaries()[0][1]"); - } - if (!(first_domain.boundaries()[1][0] == - (boundaries[1].size() > 1 ? boundaries[1][1] : boundaries[1][0]))) { - throw std::logic_error("wrong first_domain.boundaries()[1][0]"); - } - if (!(first_domain.boundaries()[1][1] == ntt::BoundaryCondition::COMM)) { - throw std::logic_error("wrong first_domain.boundaries()[1][1]"); - } - if (!(last_domain.boundaries()[0][0] == ntt::BoundaryCondition::COMM)) { - throw std::logic_error("wrong last_domain.boundaries()[0][0]"); - } - if (!(last_domain.boundaries()[0][1] == - (boundaries[0].size() > 1 ? boundaries[0][1] : boundaries[0][0]))) { - throw std::logic_error("wrong last_domain.boundaries()[0][1]"); - } - if (!(last_domain.boundaries()[1][0] == ntt::BoundaryCondition::COMM)) { - throw std::logic_error("wrong last_domain.boundaries()[1][0]"); - } - if (!(last_domain.boundaries()[1][1] == - (boundaries[1].size() > 1 ? boundaries[1][1] : boundaries[1][0]))) { - throw std::logic_error("wrong last_domain.boundaries()[1][1]"); - } - - auto first_domain1 = first_domain.neighbors({ 0, +1 }) - ->neighbors({ 0, +1 }) - ->neighbors({ 0, -1 }) - ->neighbors({ 0, -1 }); - - auto first_domain2 = last_domain.neighbors({ -1, -1 }) - ->neighbors({ -1, -1 }) - ->neighbors({ -1, 0 }) - ->neighbors({ -1, 0 }) - ->neighbors({ -1, 0 }) - ->neighbors({ -1, 0 }); - - if (first_domain1 != metadomain.domainByOffset({ 0, 0 })) { - throw std::logic_error("Wrong neighbor assignment"); - } - if (first_domain2 != first_domain1) { - throw std::logic_error("Wrong neighbor assignment"); - } - - if (first_domain.neighbors({ 0, -1 }) != nullptr) { - throw std::logic_error("Wrong neighbor assignment: boundaries"); - } - if (last_domain.neighbors({ 0, 1 }) != nullptr) { - throw std::logic_error("Wrong neighbor assignment: boundaries"); - } - - for (auto& domain : metadomain.domains) { - for (auto& direction : ntt::Directions::all) { - if ((domain.neighbors(direction) == nullptr) && - (domain.boundaryIn(direction) == ntt::BoundaryCondition::COMM)) { - throw std::logic_error("Neighbor == null && BC == COMM."); - } - if ((domain.neighbors(direction) != nullptr) && - (domain.boundaryIn(direction) != ntt::BoundaryCondition::COMM)) { - throw std::logic_error("Neighbor != null && BC != COMM."); - } - } - } - - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/legacy/tests/utils-writer.cpp b/legacy/tests/utils-writer.cpp deleted file mode 100644 index a894e0d91..000000000 --- a/legacy/tests/utils-writer.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "wrapper.h" - -#include "sandbox.h" -#include "sim_params.h" - -#include "communications/decomposition.h" -#include "communications/metadomain.h" -#include "meshblock/meshblock.h" -#include "utilities/qmath.h" - -#include "utilities/injector.hpp" - -#include -#include -#include - -#ifdef MPI_ENABLED - #include -#endif - -#include -#include -#include -#include -#include - -auto main(int argc, char* argv[]) -> int { - ntt::GlobalInitialize(argc, argv); - try { - toml::table simulation, domain, units, output; - toml::table particles, species_1, species_2, species_3; - const auto simname = "Writer-" + std::string(SIMULATION_METRIC); - simulation["title"] = simname; - domain["resolution"] = toml::array { 250, 400 }; - - particles["n_species"] = 3; - species_1["mass"] = 1.0; - species_1["charge"] = -1.0; - species_1["maxnpart"] = 1e6; - species_2["mass"] = 1.0; - species_2["charge"] = 1.0; - species_2["maxnpart"] = 1e6; - species_3["mass"] = 0.0; - species_3["charge"] = 0.0; - species_3["maxnpart"] = 1e6; - -#ifdef MINKOWSKI_METRIC - domain["extent"] = toml::array { -50.0, 50.0, -20.0, 140.0 }; - domain["boundaries"] = toml::array { toml::array { "PERIODIC" }, - toml::array { "PERIODIC" } }; -#else - domain["extent"] = toml::array { 1.0, 150.0 }; - domain["boundaries"] = toml::array { - toml::array { "OPEN", "ABSORB" }, - toml::array { "AXIS" } - }; - domain["qsph_r0"] = 0.0; - domain["qsph_h"] = 0.4; - domain["spin"] = 0.9; -#endif - - units["ppc0"] = 1.0; - units["larmor0"] = 1.0; - units["skindepth0"] = 1.0; - - output["fields"] = toml::array { "E", "B" }; - output["particles"] = toml::array { "X", "U" }; - output["prtl_stride"] = 1; - output["format"] = "HDF5"; - output["as_is"] = true; - output["ghosts"] = true; - - auto inputdata = toml::table { - {"simulation", simulation}, - { "domain", domain}, - { "units", units}, - { "output", output}, - { "particles", particles}, - { "species_1", species_1}, - { "species_2", species_2}, - { "species_3", species_3} - }; - - // write - { - ntt::SANDBOX sim(inputdata); - auto& mblock = sim.meshblock; - // allocate fields - mblock.em = ntt::ndfield_t { "em", - mblock.Ni1() + 2 * N_GHOSTS, - mblock.Ni2() + 2 * N_GHOSTS }; - mblock.bckp = ntt::ndfield_t { "bckp", - mblock.Ni1() + 2 * N_GHOSTS, - mblock.Ni2() + 2 * N_GHOSTS }; - - // allocate particles - for (auto& specie : mblock.particles) { - specie.i1 = ntt::array_t { specie.label() + "_i1", - specie.maxnpart() }; - specie.i2 = ntt::array_t { specie.label() + "_i2", - specie.maxnpart() }; - specie.dx1 = ntt::array_t { specie.label() + "_dx1", - specie.maxnpart() }; - specie.dx2 = ntt::array_t { specie.label() + "_dx2", - specie.maxnpart() }; - specie.ux1 = ntt::array_t { specie.label() + "_ux1", - specie.maxnpart() }; - specie.ux2 = ntt::array_t { specie.label() + "_ux2", - specie.maxnpart() }; - specie.ux3 = ntt::array_t { specie.label() + "_ux3", - specie.maxnpart() }; - specie.weight = ntt::array_t { specie.label() + "_w", - specie.maxnpart() }; -#ifndef MINKOWSKI_METRIC - specie.phi = ntt::array_t { specie.label() + "_phi", - specie.maxnpart() }; -#endif - specie.tag = ntt::array_t { specie.label() + "_tag", - specie.maxnpart() }; - } - - { - // fill dummy fields -#ifdef MPI_ENABLED - auto tag = (real_t)sim.metadomain()->mpiRank(); -#else - auto tag = ZERO; -#endif - Kokkos::deep_copy(mblock.em, (real_t)(-100.0)); - Kokkos::parallel_for( - "FillWithDummies", - mblock.rangeActiveCells(), - Lambda(ntt::index_t i1, ntt::index_t i2) { - mblock.em(i1, i2, ntt::em::ex1) = tag; - mblock.em(i1, i2, ntt::em::ex2) = tag + 0.1; - mblock.em(i1, i2, ntt::em::ex3) = tag + 0.2; - mblock.em(i1, i2, ntt::em::bx1) = tag + 0.3; - mblock.em(i1, i2, ntt::em::bx2) = tag + 0.4; - mblock.em(i1, i2, ntt::em::bx3) = tag + 0.5; - }); - } - { - ntt::InjectInVolume(*sim.params(), - mblock, - { 1, 2 }, - 2.0); - } - sim.Communicate(ntt::Comm_E | ntt::Comm_B); - sim.writer.WriteAll(*sim.params(), *sim.metadomain(), mblock, ZERO, 0); - for (auto& specie : mblock.particles) { - specie.setNpart((std::size_t)(specie.npart() / 2)); - } - sim.writer.WriteAll(*sim.params(), *sim.metadomain(), mblock, ZERO, 0); - } - } catch (std::exception& err) { - std::cerr << err.what() << std::endl; - ntt::GlobalFinalize(); - return -1; - } - ntt::GlobalFinalize(); - - return 0; -} \ No newline at end of file diff --git a/src/archetypes/energy_dist.h b/src/archetypes/energy_dist.h index 578a29684..56a797751 100644 --- a/src/archetypes/energy_dist.h +++ b/src/archetypes/energy_dist.h @@ -103,7 +103,7 @@ namespace arch { }; Inline void JuttnerSinge(vec_t& v, - const real_t& temp, + real_t temp, const random_number_pool_t& pool) { auto rand_gen = pool.get_state(); real_t randX1, randX2; @@ -161,13 +161,12 @@ namespace arch { } template - Inline void SampleFromMaxwellian( - vec_t& v, - const random_number_pool_t& pool, - const real_t& temperature, - const real_t& boost_velocity = static_cast(0), - const in& boost_direction = in::x1, - bool flip_velocity = false) { + Inline void SampleFromMaxwellian(vec_t& v, + const random_number_pool_t& pool, + real_t temperature, + real_t boost_velocity = static_cast(0), + in boost_direction = in::x1, + bool flip_velocity = false) { if (cmp::AlmostZero(temperature)) { v[0] = ZERO; v[1] = ZERO; diff --git a/src/global/tests/enums.cpp b/src/global/tests/enums.cpp index f653f4727..c70572be4 100644 --- a/src/global/tests/enums.cpp +++ b/src/global/tests/enums.cpp @@ -65,7 +65,7 @@ auto main() -> int { "atmosphere", "custom", "horizon", "axis", "conductor", "sync" }; enum_str_t all_particle_pushers = { "boris", "vay", "photon", "none" }; - enum_str_t all_coolings = { "synchrotron", "none" }; + enum_str_t all_coolings = { "synchrotron", "compton", "none" }; enum_str_t all_out_flds = { "e", "dive", "d", "divd", "b", "h", "j", "a", "t", "rho", diff --git a/src/global/utils/sorting.h b/src/global/utils/sorting.h index f5c02085b..7c4a1fa25 100644 --- a/src/global/utils/sorting.h +++ b/src/global/utils/sorting.h @@ -22,7 +22,7 @@ namespace sort { BinBool() = default; template - Inline auto bin(ViewType& keys, const int& i) const -> int { + Inline auto bin(ViewType& keys, int i) const -> int { return keys(i) ? 1 : 0; } @@ -38,10 +38,10 @@ namespace sort { template struct BinTag { - BinTag(const int& max_bins) : m_max_bins { max_bins } {} + BinTag(int max_bins) : m_max_bins { max_bins } {} template - Inline auto bin(ViewType& keys, const int& i) const -> int { + Inline auto bin(ViewType& keys, int i) const -> int { return (keys(i) == 0) ? 1 : ((keys(i) == 1) ? 0 : keys(i)); } diff --git a/src/kernels/fields_bcs.hpp b/src/kernels/fields_bcs.hpp index 5a1074970..575bda879 100644 --- a/src/kernels/fields_bcs.hpp +++ b/src/kernels/fields_bcs.hpp @@ -92,7 +92,7 @@ namespace kernel::bc { } } - Inline auto shape(const real_t& dx) const -> real_t { + Inline auto shape(real_t dx) const -> real_t { return math::tanh(dx * FOUR / dx_abs); } diff --git a/src/kernels/injectors.hpp b/src/kernels/injectors.hpp index a598e230a..d68acf0ed 100644 --- a/src/kernels/injectors.hpp +++ b/src/kernels/injectors.hpp @@ -26,7 +26,7 @@ namespace kernel { using namespace ntt; template - Inline void InjectParticle(const npart_t& p, + Inline void InjectParticle(npart_t p, const array_t& i1_arr, const array_t& i2_arr, const array_t& i3_arr, @@ -43,10 +43,10 @@ namespace kernel { const tuple_t& xi_Cd, const tuple_t& dxi_Cd, const vec_t& v_Cd, - const real_t& weight = ONE, - const real_t& phi = ZERO, - const npart_t& domain_idx = 0u, - const npart_t& species_cntr = 0u) { + real_t weight = ONE, + real_t phi = ZERO, + npart_t domain_idx = 0u, + npart_t species_cntr = 0u) { if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1_arr(p) = xi_Cd[0]; dx1_arr(p) = dxi_Cd[0]; @@ -615,11 +615,11 @@ namespace kernel { return idx_h(); } - Inline void inject1(const index_t& index, + Inline void inject1(const index_t index, const tuple_t& xi_Cd, const tuple_t& dxi_Cd, const vec_t& v_Cd, - const real_t& weight) const { + const real_t weight) const { // clang-format off if (not use_tracking_1) { InjectParticle(index + offset1, @@ -640,11 +640,11 @@ namespace kernel { // clang-format on } - Inline void inject2(const index_t& index, + Inline void inject2(const index_t index, const tuple_t& xi_Cd, const tuple_t& dxi_Cd, const vec_t& v_Cd, - const real_t& weight) const { + const real_t weight) const { // clang-format off if (not use_tracking_2) { InjectParticle(index + offset1, diff --git a/src/kernels/particle_pusher_gr.hpp b/src/kernels/particle_pusher_gr.hpp index c1dfdf949..dae90c626 100644 --- a/src/kernels/particle_pusher_gr.hpp +++ b/src/kernels/particle_pusher_gr.hpp @@ -29,7 +29,9 @@ /* Local macros */ /* -------------------------------------------------------------------------- */ #define from_Xi_to_i(XI, I) \ - { I = static_cast((XI + 1)) - 1; } + { \ + I = static_cast((XI + 1)) - 1; \ + } #define from_Xi_to_i_di(XI, I, DI) \ { \ @@ -111,8 +113,8 @@ namespace kernel::gr { int ni1, int ni2, int ni3, - const real_t& epsilon, - const unsigned short& niter, + real_t epsilon, + unsigned short niter, const boundaries_t& boundaries) : DB { DB } , DB0 { DB0 } @@ -213,7 +215,7 @@ namespace kernel::gr { Inline void UpdatePhi(T, const coord_t& xp, const vec_t& vp, - real_t& phi) const; + real_t phi) const; /** * @brief EM pusher (Boris) substep. @@ -273,7 +275,7 @@ namespace kernel::gr { * @param e interpolated e-field vector of size 3 [return]. * @param b interpolated b-field vector of size 3 [return]. */ - Inline void interpolateFields(index_t& p, + Inline void interpolateFields(index_t p, vec_t& e, vec_t& b) const; @@ -298,7 +300,7 @@ namespace kernel::gr { } // Extra - Inline void boundaryConditions(index_t&) const; + Inline void boundaryConditions(index_t) const; }; /* -------------------------------------------------------------------------- */ @@ -492,7 +494,7 @@ namespace kernel::gr { Inline void Pusher_kernel::UpdatePhi(T, const coord_t& xp, const vec_t& vp, - real_t& phi) const { + real_t phi) const { if constexpr (D == Dim::_1D) { raise::KernelError(HERE, "UpdatePhi: 1D implementation called"); } else if constexpr (D == Dim::_2D) { @@ -511,7 +513,7 @@ namespace kernel::gr { } template - Inline void Pusher_kernel::interpolateFields(index_t& p, + Inline void Pusher_kernel::interpolateFields(index_t p, vec_t& e0, vec_t& b0) const { if constexpr (D == Dim::_1D) { @@ -743,7 +745,7 @@ namespace kernel::gr { // Boundary conditions template - Inline void Pusher_kernel::boundaryConditions(index_t& p) const { + Inline void Pusher_kernel::boundaryConditions(index_t p) const { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { if (i1(p) < 0 && is_absorb_i1min) { tag(p) = ParticleTag::dead; diff --git a/src/kernels/particle_pusher_sr.hpp b/src/kernels/particle_pusher_sr.hpp index 9fc3582d4..3c2eb0320 100644 --- a/src/kernels/particle_pusher_sr.hpp +++ b/src/kernels/particle_pusher_sr.hpp @@ -51,7 +51,7 @@ namespace kernel::sr { enum CoolingTags_ { None = 0, Synchrotron = 1 << 0, - Compton = 1 << 1, + Compton = 1 << 1, }; } // namespace Cooling @@ -105,8 +105,8 @@ namespace kernel::sr { raise::ErrorIf(ExtForce, "External force not provided", HERE); } - Inline auto fx1(const spidx_t& sp, - const simtime_t& time, + Inline auto fx1(spidx_t sp, + simtime_t time, bool ext_force, const coord_t& x_Ph) const -> real_t { real_t f_x1 = ZERO; @@ -131,8 +131,8 @@ namespace kernel::sr { return f_x1; } - Inline auto fx2(const spidx_t& sp, - const simtime_t& time, + Inline auto fx2(spidx_t sp, + simtime_t time, bool ext_force, const coord_t& x_Ph) const -> real_t { real_t f_x2 = ZERO; @@ -157,8 +157,8 @@ namespace kernel::sr { return f_x2; } - Inline auto fx3(const spidx_t& sp, - const simtime_t& time, + Inline auto fx3(spidx_t sp, + simtime_t time, bool ext_force, const coord_t& x_Ph) const -> real_t { real_t f_x3 = ZERO; @@ -408,7 +408,7 @@ namespace kernel::sr { coeff_sync, coeff_comp) {} - Inline void synchrotronDrag(index_t& p, + Inline void synchrotronDrag(index_t p, vec_t& u_prime, const vec_t& e0, const vec_t& b0) const { @@ -459,9 +459,7 @@ namespace kernel::sr { ux3(p) += coeff_sync * (kappaR[2] - gamma_prime_sqr * u_prime[2] * chiR_sqr); } - Inline void inverseComptonDrag(index_t& p, - vec_t& u_prime - ) const { + Inline void inverseComptonDrag(index_t p, vec_t& u_prime) const { real_t gamma_prime_sqr = ONE / math::sqrt(ONE + NORM_SQR(u_prime[0], u_prime[1], u_prime[2])); @@ -591,7 +589,7 @@ namespace kernel::sr { posUpd(true, p, xp_Cd); } - Inline void posUpd(bool massive, index_t& p, coord_t& xp) const { + Inline void posUpd(bool massive, index_t p, coord_t& xp) const { // get cartesian velocity if constexpr (M::CoordType == Coord::Cart) { // i+di push for Cartesian basis @@ -682,7 +680,7 @@ namespace kernel::sr { * @param p, e0, b0 index & interpolated fields */ Inline void velUpd(bool with_gca, - index_t& p, + index_t p, vec_t& e0, vec_t& b0) const { if (with_gca) { @@ -797,7 +795,7 @@ namespace kernel::sr { } Inline void velUpd(bool, - index_t& p, + index_t p, vec_t& f0, vec_t& e0, vec_t& b0) const { @@ -839,7 +837,7 @@ namespace kernel::sr { } // Getters - Inline void getPrtlPos(index_t& p, coord_t& xp) const { + Inline void getPrtlPos(index_t p, coord_t& xp) const { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { xp[0] = i_di_to_Xi(i1(p), dx1(p)); } @@ -855,7 +853,7 @@ namespace kernel::sr { } } - Inline void getInterpFlds(index_t& p, + Inline void getInterpFlds(index_t p, vec_t& e0, vec_t& b0) const { if constexpr (D == Dim::_1D) { @@ -1120,7 +1118,7 @@ namespace kernel::sr { } // Extra - Inline void boundaryConditions(index_t& p, coord_t& xp) const { + Inline void boundaryConditions(index_t p, coord_t& xp) const { if constexpr (D == Dim::_1D || D == Dim::_2D || D == Dim::_3D) { auto invert_vel = false; if (i1(p) < 0) { diff --git a/src/kernels/prtls_to_phys.hpp b/src/kernels/prtls_to_phys.hpp index 8ac9ba38c..678fb15ab 100644 --- a/src/kernels/prtls_to_phys.hpp +++ b/src/kernels/prtls_to_phys.hpp @@ -131,7 +131,7 @@ namespace kernel { } } - Inline void bufferX(const index_t& p_from, const index_t& p_to) const { + Inline void bufferX(index_t p_from, index_t p_to) const { if constexpr ((D == Dim::_1D) || (D == Dim::_2D) || (D == Dim::_3D)) { buff_x1(p_to) = metric.template convert<1, Crd::Cd, Crd::Ph>( static_cast(i1(p_from)) + static_cast(dx1(p_from))); @@ -148,7 +148,7 @@ namespace kernel { } } - Inline void bufferU(const index_t& p_from, const index_t& p_to) const { + Inline void bufferU(index_t p_from, index_t p_to) const { vec_t u_Phys { ZERO }; if constexpr (D == Dim::_1D) { if constexpr (M::CoordType == Coord::Cart) { @@ -206,7 +206,7 @@ namespace kernel { buff_ux3(p_to) = u_Phys[2]; } - Inline void bufferPlds(const index_t& p_from, const index_t& p_to) const { + Inline void bufferPlds(index_t p_from, index_t p_to) const { for (auto pr { 0u }; pr < buff_pldr.extent(1); ++pr) { buff_pldr(p_to, pr) = pld_r(p_from, pr); } diff --git a/src/kernels/tests/ext_force.cpp b/src/kernels/tests/ext_force.cpp index 12e3466cf..916f7b11e 100644 --- a/src/kernels/tests/ext_force.cpp +++ b/src/kernels/tests/ext_force.cpp @@ -53,21 +53,15 @@ struct Force { Force(real_t force) : force { force } {} - Inline auto fx1(const spidx_t&, - const simtime_t&, - const coord_t&) const -> real_t { + Inline auto fx1(spidx_t, simtime_t, const coord_t&) const -> real_t { return force * math::sin(ONE) * math::sin(ONE); } - Inline auto fx2(const spidx_t&, - const simtime_t&, - const coord_t&) const -> real_t { + Inline auto fx2(spidx_t, simtime_t, const coord_t&) const -> real_t { return force * math::sin(ONE) * math::cos(ONE); } - Inline auto fx3(const spidx_t&, - const simtime_t&, - const coord_t&) const -> real_t { + Inline auto fx3(spidx_t, simtime_t, const coord_t&) const -> real_t { return force * math::cos(ONE); } @@ -203,7 +197,7 @@ void testPusher(const std::vector& res) { (simtime_t)time, coeff, dt, nx1, nx2, nx3, boundaries, - ZERO, ZERO, ZERO)); + ZERO, ZERO, ZERO, ZERO)); auto i1_prev_ = Kokkos::create_mirror_view(i1_prev); auto i2_prev_ = Kokkos::create_mirror_view(i2_prev); diff --git a/src/kernels/tests/gca_pusher.cpp b/src/kernels/tests/gca_pusher.cpp index 5630de414..7a2e2d9a3 100644 --- a/src/kernels/tests/gca_pusher.cpp +++ b/src/kernels/tests/gca_pusher.cpp @@ -173,7 +173,7 @@ void testPusher(const std::vector& res) { ZERO, coeff, dt, nx1, nx2, nx3, boundaries, - (real_t)10000.0, ONE, ZERO)); + (real_t)10000.0, ONE, ZERO, ZERO)); auto ux1_ = Kokkos::create_mirror_view(ux1); auto ux2_ = Kokkos::create_mirror_view(ux2); diff --git a/src/kernels/tests/prtl_bc.cpp b/src/kernels/tests/prtl_bc.cpp index f7f8be43b..f75c90bc4 100644 --- a/src/kernels/tests/prtl_bc.cpp +++ b/src/kernels/tests/prtl_bc.cpp @@ -259,7 +259,7 @@ void testPeriodicBC(const std::vector& res, time, coeff, dt, nx1, nx2, nx3, boundaries, - ZERO, ZERO, ZERO)); + ZERO, ZERO, ZERO, ZERO)); // clang-format on auto i1_ = Kokkos::create_mirror_view(i1); auto i2_ = Kokkos::create_mirror_view(i2); diff --git a/src/kernels/tests/pusher.cpp b/src/kernels/tests/pusher.cpp index 8496b592d..ddbc1be8a 100644 --- a/src/kernels/tests/pusher.cpp +++ b/src/kernels/tests/pusher.cpp @@ -175,7 +175,7 @@ void testPusher(const std::vector& res) { ZERO, coeff, dt, nx1, nx2, nx3, boundaries, - ZERO, ZERO, ZERO)); + ZERO, ZERO, ZERO, ZERO)); Kokkos::parallel_for( "pusher", @@ -194,7 +194,7 @@ void testPusher(const std::vector& res) { ZERO, coeff, dt, nx1, nx2, nx3, boundaries, - ZERO, ZERO, ZERO)); + ZERO, ZERO, ZERO, ZERO)); auto i1_prev_ = Kokkos::create_mirror_view(i1_prev); auto i2_prev_ = Kokkos::create_mirror_view(i2_prev); diff --git a/src/metrics/kerr_schild.h b/src/metrics/kerr_schild.h index e8626fb79..14441f995 100644 --- a/src/metrics/kerr_schild.h +++ b/src/metrics/kerr_schild.h @@ -41,19 +41,19 @@ namespace metric { const real_t dr, dtheta, dphi; const real_t dr_inv, dtheta_inv, dphi_inv; - Inline auto Delta(const real_t& r) const -> real_t { + Inline auto Delta(real_t r) const -> real_t { return SQR(r) - TWO * r + SQR(a); } - Inline auto Sigma(const real_t& r, const real_t& theta) const -> real_t { + Inline auto Sigma(real_t r, real_t theta) const -> real_t { return SQR(r) + SQR(a) * SQR(math::cos(theta)); } - Inline auto A(const real_t& r, const real_t& theta) const -> real_t { + Inline auto A(real_t r, real_t theta) const -> real_t { return SQR(SQR(r) + SQR(a)) - SQR(a) * Delta(r) * SQR(math::sin(theta)); } - Inline auto z(const real_t& r, const real_t& theta) const -> real_t { + Inline auto z(real_t r, real_t theta) const -> real_t { return TWO * r / Sigma(r, theta); } @@ -92,17 +92,17 @@ namespace metric { ~KerrSchild() = default; [[nodiscard]] - Inline auto spin() const -> const real_t& { + Inline auto spin() const -> real_t { return a; } [[nodiscard]] - Inline auto rhorizon() const -> const real_t& { + Inline auto rhorizon() const -> real_t { return rh_; } [[nodiscard]] - Inline auto rg() const -> const real_t& { + Inline auto rg() const -> real_t { return rg_; } @@ -340,7 +340,7 @@ namespace metric { * dtheta derivative of Sigma * @param x coordinate array in code units */ - Inline auto dt_Sigma(const real_t& theta) const -> real_t { + Inline auto dt_Sigma(real_t theta) const -> real_t { const real_t dt_Sigma { -TWO * SQR(a) * math::sin(theta) * math::cos(theta) * dtheta }; if (cmp::AlmostZero(dt_Sigma)) { @@ -354,7 +354,7 @@ namespace metric { * dtheta derivative of A * @param x coordinate array in code units */ - Inline auto dt_A(const real_t& r, const real_t& theta) const -> real_t { + Inline auto dt_A(real_t r, real_t theta) const -> real_t { const real_t dt_A { -TWO * SQR(a) * math::sin(theta) * math::cos(theta) * Delta(r) * dtheta }; if (cmp::AlmostZero(dt_A)) { @@ -444,7 +444,7 @@ namespace metric { * differential area at the pole (used in axisymmetric solvers) * @param x1 radial coordinate along the axis (code units) */ - Inline auto polar_area(const real_t& x1) const -> real_t { + Inline auto polar_area(real_t x1) const -> real_t { return dr * (SQR(x1 * dr + x1_min) + SQR(a)) * math::sqrt(ONE + TWO * (x1 * dr + x1_min) / (SQR(x1 * dr + x1_min) + SQR(a))) * @@ -455,7 +455,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::Sph || out == Crd::Ph)) || diff --git a/src/metrics/kerr_schild_0.h b/src/metrics/kerr_schild_0.h index 142e88b7a..66834a4c7 100644 --- a/src/metrics/kerr_schild_0.h +++ b/src/metrics/kerr_schild_0.h @@ -73,17 +73,17 @@ namespace metric { ~KerrSchild0() = default; [[nodiscard]] - Inline auto spin() const -> const real_t& { + Inline auto spin() const -> real_t { return a; } [[nodiscard]] - Inline auto rhorizon() const -> const real_t& { + Inline auto rhorizon() const -> real_t { return rh_; } [[nodiscard]] - Inline auto rg() const -> const real_t& { + Inline auto rg() const -> real_t { return rg_; } @@ -325,7 +325,7 @@ namespace metric { * differential area at the pole (used in axisymmetric solvers) * @param x1 radial coordinate along the axis (code units) */ - Inline auto polar_area(const real_t& x1) const -> real_t { + Inline auto polar_area(real_t x1) const -> real_t { return dr * SQR(x1 * dr + x1_min) * (ONE - math::cos(HALF * dtheta)); } @@ -333,7 +333,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::Sph || out == Crd::Ph)) || diff --git a/src/metrics/minkowski.h b/src/metrics/minkowski.h index cf4aad2b6..47430ccf3 100644 --- a/src/metrics/minkowski.h +++ b/src/metrics/minkowski.h @@ -152,7 +152,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= static_cast(D), "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::XYZ || out == Crd::Ph)) || @@ -258,7 +258,7 @@ namespace metric { * @note tetrad/cart <-> cntrv <-> cov */ template - Inline auto transform(const coord_t& xi, const real_t& v_in) const -> real_t { + Inline auto transform(const coord_t& xi, real_t v_in) const -> real_t { static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert(in != out, "Invalid vector transformation"); if constexpr (i > static_cast(D)) { diff --git a/src/metrics/qkerr_schild.h b/src/metrics/qkerr_schild.h index f27a08790..9b608f35f 100644 --- a/src/metrics/qkerr_schild.h +++ b/src/metrics/qkerr_schild.h @@ -40,19 +40,19 @@ namespace metric { const real_t dchi, deta, dphi; const real_t dchi_inv, deta_inv, dphi_inv; - Inline auto Delta(const real_t& r) const -> real_t { + Inline auto Delta(real_t r) const -> real_t { return SQR(r) - TWO * r + SQR(a); } - Inline auto Sigma(const real_t& r, const real_t& theta) const -> real_t { + Inline auto Sigma(real_t r, real_t theta) const -> real_t { return SQR(r) + SQR(a) * SQR(math::cos(theta)); } - Inline auto A(const real_t& r, const real_t& theta) const -> real_t { + Inline auto A(real_t r, real_t theta) const -> real_t { return SQR(SQR(r) + SQR(a)) - SQR(a) * Delta(r) * SQR(math::sin(theta)); } - Inline auto z(const real_t& r, const real_t& theta) const -> real_t { + Inline auto z(real_t r, real_t theta) const -> real_t { return TWO * r / Sigma(r, theta); } @@ -96,17 +96,17 @@ namespace metric { ~QKerrSchild() = default; [[nodiscard]] - Inline auto spin() const -> const real_t& { + Inline auto spin() const -> real_t { return a; } [[nodiscard]] - Inline auto rhorizon() const -> const real_t& { + Inline auto rhorizon() const -> real_t { return rh_; } [[nodiscard]] - Inline auto rg() const -> const real_t& { + Inline auto rg() const -> real_t { return rg_; } @@ -388,7 +388,7 @@ namespace metric { * dtheta derivative of Sigma * @param x coordinate array in code units */ - Inline auto dt_Sigma(const real_t& eta) const -> real_t { + Inline auto dt_Sigma(real_t eta) const -> real_t { const real_t theta { eta2theta(eta) }; const real_t dt_Sigma { -TWO * SQR(a) * math::sin(theta) * math::cos(theta) * dx_dt(eta) }; @@ -403,7 +403,7 @@ namespace metric { * dtheta derivative of A * @param x coordinate array in code units */ - Inline auto dt_A(const real_t& r, const real_t& eta) const -> real_t { + Inline auto dt_A(real_t r, real_t eta) const -> real_t { const real_t theta { eta2theta(eta) }; const real_t dt_A { -TWO * SQR(a) * math::sin(theta) * math::cos(theta) * Delta(r) * dx_dt(eta) }; @@ -504,7 +504,7 @@ namespace metric { * @note approximate solution for the polar area * @param x1 radial coordinate along the axis (code units) */ - Inline auto polar_area(const real_t& x1) const -> real_t { + Inline auto polar_area(real_t x1) const -> real_t { if constexpr (D != Dim::_1D) { return dchi * math::exp(x1 * dchi + chi_min) * (SQR(r0 + math::exp(x1 * dchi + chi_min)) + SQR(a)) * @@ -519,7 +519,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::Sph || out == Crd::Ph)) || @@ -654,7 +654,7 @@ namespace metric { /** * @brief d(th) / d(eta) for a given eta */ - Inline auto dtheta_deta(const real_t& eta) const -> real_t { + Inline auto dtheta_deta(real_t eta) const -> real_t { if (cmp::AlmostZero(h0)) { return ONE; } else { @@ -668,7 +668,7 @@ namespace metric { /** * @brief quasi-spherical eta to spherical theta */ - Inline auto eta2theta(const real_t& eta) const -> real_t { + Inline auto eta2theta(real_t eta) const -> real_t { if (cmp::AlmostZero(h0)) { return eta; } else { @@ -682,7 +682,7 @@ namespace metric { /** * @brief quasi-spherical eta to spherical theta */ - Inline auto dx_dt(const real_t& eta) const -> real_t { + Inline auto dx_dt(real_t eta) const -> real_t { if (cmp::AlmostZero(h0)) { return deta; } else { @@ -697,7 +697,7 @@ namespace metric { /** * @brief spherical theta to quasi-spherical eta */ - Inline auto theta2eta(const real_t& theta) const -> real_t { + Inline auto theta2eta(real_t theta) const -> real_t { if (cmp::AlmostZero(h0)) { return theta; } else { diff --git a/src/metrics/qspherical.h b/src/metrics/qspherical.h index 4acfda442..09340bd8a 100644 --- a/src/metrics/qspherical.h +++ b/src/metrics/qspherical.h @@ -197,7 +197,7 @@ namespace metric { * differential area at the pole (used in axisymmetric solvers) * @param x1 radial coordinate along the axis (code units) */ - Inline auto polar_area(const real_t& x1) const -> real_t { + Inline auto polar_area(real_t x1) const -> real_t { if constexpr (D != Dim::_1D) { const real_t exp_chi { math::exp(x1 * dchi + chi_min) }; if (small_angle) { @@ -216,7 +216,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::Sph || out == Crd::Ph)) || @@ -307,7 +307,7 @@ namespace metric { * @note tetrad/sph <-> cntrv <-> cov */ template - Inline auto transform(const coord_t& xi, const real_t& v_in) const -> real_t { + Inline auto transform(const coord_t& xi, real_t v_in) const -> real_t { static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert(in != out, "Invalid vector transformation"); if constexpr ((in == Idx::T && out == Idx::Sph) || @@ -440,7 +440,7 @@ namespace metric { /** * @brief Compute d(th) / d(eta) for a given eta. */ - Inline auto dtheta_deta(const real_t& eta) const -> real_t { + Inline auto dtheta_deta(real_t eta) const -> real_t { if (cmp::AlmostZero(h)) { return ONE; } else { @@ -453,7 +453,7 @@ namespace metric { /** * @brief Convert quasi-spherical eta to spherical theta. */ - Inline auto eta2theta(const real_t& eta) const -> real_t { + Inline auto eta2theta(real_t eta) const -> real_t { if (cmp::AlmostZero(h)) { return eta; } else { @@ -465,7 +465,7 @@ namespace metric { /** * @brief Convert spherical theta to quasi-spherical eta. */ - Inline auto theta2eta(const real_t& theta) const -> real_t { + Inline auto theta2eta(real_t theta) const -> real_t { if (cmp::AlmostZero(h)) { return theta; } else { diff --git a/src/metrics/spherical.h b/src/metrics/spherical.h index 388d3710e..50367c765 100644 --- a/src/metrics/spherical.h +++ b/src/metrics/spherical.h @@ -170,7 +170,7 @@ namespace metric { * @param x1 radial coordinate along the axis (code units) * @note uses small-angle approximation when the resolution is too high */ - Inline auto polar_area(const real_t& x1) const -> real_t { + Inline auto polar_area(real_t x1) const -> real_t { if (small_angle) { return dr * SQR(x1 * dr + x1_min) * (static_cast(48) - SQR(dtheta)) * SQR(dtheta) / @@ -184,7 +184,7 @@ namespace metric { * component-wise coordinate conversions */ template - Inline auto convert(const real_t& x_in) const -> real_t { + Inline auto convert(real_t x_in) const -> real_t { static_assert(in != out, "Invalid coordinate conversion"); static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert((in == Crd::Cd && (out == Crd::Sph || out == Crd::Ph)) || @@ -275,7 +275,7 @@ namespace metric { * @note tetrad/sph <-> cntrv <-> cov */ template - Inline auto transform(const coord_t& xi, const real_t& v_in) const -> real_t { + Inline auto transform(const coord_t& xi, real_t v_in) const -> real_t { static_assert(i > 0 && i <= 3, "Invalid index i"); static_assert(in != out, "Invalid vector transformation"); if constexpr ((in == Idx::T && out == Idx::Sph) || From cdad86690354518fb64229a4ba264dde51b9c591 Mon Sep 17 00:00:00 2001 From: haykh Date: Mon, 1 Dec 2025 19:34:21 -0500 Subject: [PATCH 120/154] bug in nonuniforminject + fractional ppc bug --- src/engine_registry.h | 51 ++++++++++++ src/engines/engine_init.cpp | 18 +---- src/engines/engine_printer.cpp | 19 ++--- src/engines/engine_run.cpp | 18 +---- src/entity.cpp | 143 ++++++++++++++++----------------- src/kernels/injectors.hpp | 23 ++++-- 6 files changed, 149 insertions(+), 123 deletions(-) create mode 100644 src/engine_registry.h diff --git a/src/engine_registry.h b/src/engine_registry.h new file mode 100644 index 000000000..bba6c2ecb --- /dev/null +++ b/src/engine_registry.h @@ -0,0 +1,51 @@ +#ifndef ENGINE_REGISTRY_H +#define ENGINE_REGISTRY_H + +#include "enums.h" + +#include + +#include "metrics/kerr_schild.h" +#include "metrics/kerr_schild_0.h" +#include "metrics/minkowski.h" +#include "metrics/qkerr_schild.h" +#include "metrics/qspherical.h" +#include "metrics/spherical.h" + +#include "engines/grpic.hpp" +#include "engines/srpic.hpp" + +namespace ntt { + + // Central registry for supported engine/metric/dimension triplets. + // Add new combinations here to enable runtime dispatch and explicit instantiations. +#define NTT_ENGINE_METRIC_DIMENSION_REGISTRY(MACRO) \ + MACRO(ntt::SRPICEngine, metric::Minkowski, Dim::_1D) \ + MACRO(ntt::SRPICEngine, metric::Minkowski, Dim::_2D) \ + MACRO(ntt::SRPICEngine, metric::Minkowski, Dim::_3D) \ + MACRO(ntt::SRPICEngine, metric::Spherical, Dim::_2D) \ + MACRO(ntt::SRPICEngine, metric::QSpherical, Dim::_2D) \ + MACRO(ntt::GRPICEngine, metric::KerrSchild, Dim::_2D) \ + MACRO(ntt::GRPICEngine, metric::KerrSchild0, Dim::_2D) \ + MACRO(ntt::GRPICEngine, metric::QKerrSchild, Dim::_2D) + + namespace detail { + template