Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Threaded grids, and avoiding duplication #89

Merged
merged 13 commits into from
Jul 20, 2017
39 changes: 19 additions & 20 deletions fbpic/fields/numba_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,18 @@
This file is part of the Fourier-Bessel Particle-In-Cell code (FB-PIC)
It defines the optimized fields methods that use numba on a CPU
"""
import numba
from scipy.constants import c, epsilon_0, mu_0
c2 = c**2
from fbpic.threading_utils import njit_parallel, prange

@numba.jit('void(complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
float64[:,:], float64[:,:], float64[:,:], \
float64, int32, int32)')
@njit_parallel
def numba_correct_currents_standard( rho_prev, rho_next, Jp, Jm, Jz,
kz, kr, inv_k2, inv_dt, Nz, Nr ):
"""
Correct the currents in spectral space, using the standard pstad
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Calculate the intermediate variable F
Expand All @@ -33,13 +30,9 @@ def numba_correct_currents_standard( rho_prev, rho_next, Jp, Jm, Jz,
Jm[iz, ir] += -0.5 * kr[iz, ir] * F
Jz[iz, ir] += -1.j * kz[iz, ir] * F

@numba.jit('void(complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], \
float64[:,:], float64[:,:], float64[:,:], \
float64[:,:], float64[:,:], float64[:,:], float64[:,:], float64, \
int8, int32, int32)')
return

@njit_parallel
def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
rho_prev, rho_next,
rho_prev_coef, rho_next_coef, j_coef,
Expand All @@ -50,8 +43,8 @@ def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,

See the documentation of SpectralGrid.push_eb_with
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Save the electric fields, since it is needed for the B push
Expand Down Expand Up @@ -106,7 +99,9 @@ def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
+ j_coef[iz, ir]*( 1.j*kr[iz, ir]*Jp[iz, ir] \
+ 1.j*kr[iz, ir]*Jm[iz, ir] )

@numba.jit
return

@njit_parallel
def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
kz, kr, inv_k2,
j_corr_coef, T_eb, T_cc,
Expand All @@ -115,8 +110,8 @@ def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
Correct the currents in spectral space, using the assumption
of comoving currents
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Calculate the intermediate variable F
Expand All @@ -130,7 +125,9 @@ def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
Jm[iz, ir] += -0.5 * kr[iz, ir] * F
Jz[iz, ir] += -1.j * kz[iz, ir] * F

@numba.jit
return

@njit_parallel
def numba_push_eb_comoving( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
rho_prev, rho_next,
rho_prev_coef, rho_next_coef, j_coef,
Expand Down Expand Up @@ -207,3 +204,5 @@ def numba_push_eb_comoving( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
+ 1.j*kr[iz, ir]*Em_old ) \
+ j_coef[iz, ir]*( 1.j*kr[iz, ir]*Jp[iz, ir] \
+ 1.j*kr[iz, ir]*Jm[iz, ir] )

return
29 changes: 16 additions & 13 deletions fbpic/fields/spectral_transform/spectral_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .hankel import DHT
from .fourier import FFT

from .threading_methods import numba_rt_to_pm, numba_pm_to_rt
# Check if CUDA is available, then import CUDA functions
from fbpic.cuda_utils import cuda_installed
if cuda_installed:
Expand Down Expand Up @@ -139,12 +140,13 @@ def spect2interp_vect( self, spect_array_p, spect_array_m,
self.spect_buffer_r, self.spect_buffer_t )
else :
# Combine them on the CPU
# (It is important to write the affectation in the following way,
# since self.spect_buffer_p and self.spect_buffer_r actually point
# to the same object, for memory economy)
self.spect_buffer_r[:,:], self.spect_buffer_t[:,:] = \
( self.spect_buffer_p + self.spect_buffer_m), \
1.j*( self.spect_buffer_p - self.spect_buffer_m)
# (self.spect_buffer_r and self.spect_buffer_t are
# passed in the following line, in order to make things
# explicit, but they actually point to the same object
# as self.spect_buffer_p, self.spect_buffer_m,
# for economy of memory)
numba_pm_to_rt( self.spect_buffer_p, self.spect_buffer_m,
self.spect_buffer_r, self.spect_buffer_t )

# Finally perform the FFT (along axis 0, which corresponds to z)
self.fft.inverse_transform( self.spect_buffer_r, interp_array_r )
Expand Down Expand Up @@ -205,13 +207,14 @@ def interp2spect_vect( self, interp_array_r, interp_array_t,
self.spect_buffer_r, self.spect_buffer_t,
self.spect_buffer_p, self.spect_buffer_m )
else :
# Combine them on the CPU
# (It is important to write the affectation in the following way,
# since self.spect_buffer_p and self.spect_buffer_r actually point
# to the same object, for memory economy.)
self.spect_buffer_p[:,:], self.spect_buffer_m[:,:] = \
0.5*( self.spect_buffer_r - 1.j*self.spect_buffer_t ), \
0.5*( self.spect_buffer_r + 1.j*self.spect_buffer_t )
# Combine them on the GPU
# (self.spect_buffer_p and self.spect_buffer_m are
# passed in the following line, in order to make things
# explicit, but they actually point to the same object
# as self.spect_buffer_r, self.spect_buffer_t,
# for economy of memory)
numba_rt_to_pm( self.spect_buffer_r, self.spect_buffer_t,
self.spect_buffer_p, self.spect_buffer_m )

# Perform the inverse DHT (along axis -1, which corresponds to r)
self.dhtp.transform( self.spect_buffer_p, spect_array_p )
Expand Down
58 changes: 58 additions & 0 deletions fbpic/fields/spectral_transform/threading_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2016, FBPIC contributors
# Authors: Remi Lehe, Manuel Kirchen
# License: 3-Clause-BSD-LBNL
"""
This file is part of the Fourier-Bessel Particle-In-Cell code (FB-PIC)
It defines a set of functions that are useful when converting the
fields from interpolation grid to the spectral grid and vice-versa
"""
from fbpic.threading_utils import prange, njit_parallel

# ----------------------------------------------------
# Functions that combine components in spectral space
# ----------------------------------------------------

@njit_parallel
def numba_rt_to_pm( buffer_r, buffer_t, buffer_p, buffer_m ) :
"""
Combine the arrays buffer_r and buffer_t to produce the
arrays buffer_p and buffer_m, according to the rules of
the Fourier-Hankel decomposition (see associated paper)
"""
Nz, Nr = buffer_r.shape

# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Use intermediate variables, as the arrays
# buffer_r and buffer_t may actually point to the same
# object as buffer_p and buffer_m, for economy of memory
value_r = buffer_r[iz, ir]
value_t = buffer_t[iz, ir]
# Combine the values
buffer_p[iz, ir] = 0.5*( value_r - 1.j*value_t )
buffer_m[iz, ir] = 0.5*( value_r + 1.j*value_t )


@njit_parallel
def numba_pm_to_rt( buffer_p, buffer_m, buffer_r, buffer_t ) :
"""
Combine the arrays buffer_p and buffer_m to produce the
arrays buffer_r and buffer_t, according to the rules of
the Fourier-Hankel decomposition (see associated paper)
"""
Nz, Nr = buffer_p.shape

# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Use intermediate variables, as the arrays
# buffer_r and buffer_t may actually point to the same
# object as buffer_p and buffer_m, for economy of memory
value_p = buffer_p[iz, ir]
value_m = buffer_m[iz, ir]
# Combine the values
buffer_r[iz, ir] = ( value_p + value_m )
buffer_t[iz, ir] = 1.j*( value_p - value_m )
33 changes: 9 additions & 24 deletions fbpic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# as it sets the cuda context)
from mpi4py import MPI
import numba
# Check if threading is available
from .threading_utils import threading_enabled
# Check if CUDA is available, then import CUDA functions
from .cuda_utils import cuda_installed
if cuda_installed:
Expand Down Expand Up @@ -43,9 +45,8 @@ def __init__(self, Nz, zmax, Nr, rmax, Nm, dt, p_zmin, p_zmax,
p_rmin, p_rmax, p_nz, p_nr, p_nt, n_e, zmin=0.,
n_order=-1, dens_func=None, filter_currents=True,
v_comoving=None, use_galilean=False, initialize_ions=False,
use_cuda=False, use_threading=True, nthreads=None,
n_guard=None, n_damp=30, exchange_period=None,
boundaries='periodic', gamma_boost=None,
use_cuda=False, n_guard=None, n_damp=30, exchange_period=None,
boundaries='periodic', gamma_boost=None,
use_all_mpi_ranks=True, particle_shape='linear' ):
"""
Initializes a simulation, by creating the following structures:
Expand Down Expand Up @@ -132,12 +133,6 @@ def dens_func( z, r ) ...

use_cuda: bool, optional
Wether to use CUDA (GPU) acceleration
use_threading : bool, optional
Wether to use multi-threading on the CPU.
nthreads: int, optional
Number of CPU multi-threading threads used (if use_threading
is set). If nthreads is set to None, the number of threads
are automatically determined.

n_guard: int, optional
Number of guard cells to use at the left and right of
Expand Down Expand Up @@ -199,16 +194,8 @@ def dens_func( z, r ) ...
print('*** Performing the simulation on CPU.')
self.use_cuda = False
# CPU multi-threading
self.use_threading = use_threading
if self.use_threading:
# Define number of threads used
if nthreads is not None:
# Automatically take numba preset for number of threads
self.nthreads = nthreads
numba.config.NUMBA_NUM_THREADS = self.nthreads
else:
# Set user-defined number of threads
self.nthreads = numba.config.NUMBA_NUM_THREADS
self.use_threading = threading_enabled

# Register the comoving parameters
self.v_comoving = v_comoving
self.use_galilean = use_galilean
Expand Down Expand Up @@ -254,16 +241,14 @@ def dens_func( z, r ) ...
zmax=p_zmax, Npr=Npr, rmin=p_rmin, rmax=p_rmax,
Nptheta=p_nt, dt=dt, dens_func=dens_func, uz_m=uz_m,
grid_shape=grid_shape, particle_shape=particle_shape,
use_cuda=self.use_cuda,
use_threading=self.use_threading) ]
use_cuda=self.use_cuda ) ]
if initialize_ions :
self.ptcl.append(
Particles(q=e, m=m_p, n=n_e, Npz=Npz, zmin=p_zmin,
zmax=p_zmax, Npr=Npr, rmin=p_rmin, rmax=p_rmax,
Nptheta=p_nt, dt=dt, dens_func=dens_func, uz_m=uz_m,
grid_shape=grid_shape, particle_shape=particle_shape,
use_cuda=self.use_cuda,
use_threading=self.use_threading) )
use_cuda=self.use_cuda ) )

# Register the number of particles per cell along z, and dt
# (Necessary for the moving window)
Expand Down Expand Up @@ -620,7 +605,7 @@ def print_simulation_setup( comm, use_cuda, use_threading ):
if use_threading and not use_cuda:
message += " (%d threads per proc)" %numba.config.NUMBA_NUM_THREADS
message += ".\n"

print( message )

def adapt_to_grid( x, p_xmin, p_xmax, p_nx, ncells_empty=0 ):
Expand Down
Loading