Skip to content

Commit

Permalink
Merge pull request #80 from fbpic/cpuprange
Browse files Browse the repository at this point in the history
Implementation of CPU multi-threading
  • Loading branch information
RemiLehe committed Jul 22, 2017
2 parents 5897555 + 6204585 commit 68d4554
Show file tree
Hide file tree
Showing 27 changed files with 4,880 additions and 3,236 deletions.
128 changes: 56 additions & 72 deletions fbpic/boundaries/moving_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scipy.constants import c
from fbpic.particles import Particles
from fbpic.lpa_utils.boosted_frame import BoostConverter
from fbpic.threading_utils import njit_parallel, prange
# Check if CUDA is available, then import CUDA functions
from fbpic.cuda_utils import cuda_installed
if cuda_installed:
Expand Down Expand Up @@ -318,86 +319,36 @@ def shift_spect_grid( self, grid, n_move,
"""
if grid.use_cuda:
shift = grid.d_field_shift
# Get a 2D CUDA grid of the size of the grid
tpb, bpg = cuda_tpb_bpg_2d( grid.Ep.shape[0], grid.Ep.shape[1] )
# Shift all the fields on the GPU
self.shift_spect_field_gpu( grid.Ep, shift, n_move )
self.shift_spect_field_gpu( grid.Em, shift, n_move )
self.shift_spect_field_gpu( grid.Ez, shift, n_move )
self.shift_spect_field_gpu( grid.Bp, shift, n_move )
self.shift_spect_field_gpu( grid.Bm, shift, n_move )
self.shift_spect_field_gpu( grid.Bz, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Ep, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Em, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Ez, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Bp, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Bm, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Bz, shift, n_move )
if shift_rho:
self.shift_spect_field_gpu( grid.rho_prev, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.rho_prev, shift, n_move )
if shift_currents:
self.shift_spect_field_gpu( grid.Jp, shift, n_move )
self.shift_spect_field_gpu( grid.Jm, shift, n_move )
self.shift_spect_field_gpu( grid.Jz, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Jp, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Jm, shift, n_move )
shift_spect_array_gpu[tpb, bpg]( grid.Jz, shift, n_move )
else:
shift = grid.field_shift
# Shift all the fields on the CPU
self.shift_spect_field( grid.Ep, shift, n_move )
self.shift_spect_field( grid.Em, shift, n_move )
self.shift_spect_field( grid.Ez, shift, n_move )
self.shift_spect_field( grid.Bp, shift, n_move )
self.shift_spect_field( grid.Bm, shift, n_move )
self.shift_spect_field( grid.Bz, shift, n_move )
shift_spect_array_cpu( grid.Ep, shift, n_move )
shift_spect_array_cpu( grid.Em, shift, n_move )
shift_spect_array_cpu( grid.Ez, shift, n_move )
shift_spect_array_cpu( grid.Bp, shift, n_move )
shift_spect_array_cpu( grid.Bm, shift, n_move )
shift_spect_array_cpu( grid.Bz, shift, n_move )
if shift_rho:
self.shift_spect_field( grid.rho_prev, shift, n_move )
shift_spect_array_cpu( grid.rho_prev, shift, n_move )
if shift_currents:
self.shift_spect_field( grid.Jp, shift, n_move )
self.shift_spect_field( grid.Jm, shift, n_move )
self.shift_spect_field( grid.Jz, shift, n_move )

def shift_spect_field( self, field_array, shift_factor, n_move ):
"""
Shift the field 'field_array' by n_move cells.
This is done in spectral space and corresponds to multiplying the
fields with the factor exp(i*kz_true*dz)**n_move .
(Typically n_move is positive, and the fields are shifted backwards)
Parameters
----------
field_array: 2darray of complexs
Contains the value of the fields, and is modified by
this function
shift_factor: 1darray of complexs
Contains the shift array, that is multiplied to the fields in
spectral space to shift them by one cell in spatial space
( exp(i*kz_true*dz) )
n_move: int
The number of cells by which the grid should be shifted
"""
# Multiply with (shift_factor*sign(n_move))**n_move
field_array *= ( shift_factor[:, np.newaxis] )**n_move

def shift_spect_field_gpu( self, field_array, shift_factor, n_move):
"""
Shift the field 'field_array' by n_move cells on the GPU.
This is done in spectral space and corresponds to multiplying the
fields with the factor exp(i*kz_true*dz)**n_move .
(Typically n_move is positive, and the fields are shifted backwards)
Parameters
----------
field_array: 2darray of complexs
Contains the value of the fields, and is modified by
this function
shift_factor: 1darray of complexs
Contains the shift array, that is multiplied to the fields in
spectral space to shift them by one cell in spatial space
( exp(i*kz_true*dz) )
n_move: int
The number of cells by which the grid should be shifted
"""
# Get a 2D CUDA grid of the size of the grid
dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(
field_array.shape[0], field_array.shape[1] )
# Shift the field array in place
shift_spect_array_gpu[dim_grid_2d, dim_block_2d](
field_array, shift_factor, n_move)
shift_spect_array_cpu( grid.Jp, shift, n_move )
shift_spect_array_cpu( grid.Jm, shift, n_move )
shift_spect_array_cpu( grid.Jz, shift, n_move )

def shift_interp_grid( self, grid, n_move,
shift_rho=True, shift_currents=False ):
Expand Down Expand Up @@ -513,6 +464,39 @@ def shift_interp_field_gpu( self, field_array, n_move):
# Return the new shifted field array
return( field_array )

@njit_parallel
def shift_spect_array_cpu( field_array, shift_factor, n_move ):
"""
Shift the field 'field_array' by n_move cells on CPU.
This is done in spectral space and corresponds to multiplying the
fields with the factor exp(i*kz_true*dz)**n_move .
Parameters
----------
field_array: 2darray of complexs
Contains the value of the fields, and is modified by
this function
shift_factor: 1darray of complexs
Contains the shift array, that is multiplied to the fields in
spectral space to shift them by one cell in spatial space
( exp(i*kz_true*dz) )
n_move: int
The number of cells by which the grid should be shifted
"""
Nz, Nr = field_array.shape

# Loop over the 2D array (in parallel over z if threading is enabled)
for iz in prange( Nz ):
power_shift = shift_factor[iz]
# Calculate the shift factor (raising to the power n_move)
for i in range(1,n_move):
power_shift *= shift_factor[iz]
# Shift fields backwards
for ir in range( Nr ):
field_array[iz, ir] *= power_shift

if cuda_installed:

@cuda.jit('void(complex128[:,:], complex128[:,:], int32)')
Expand Down
39 changes: 19 additions & 20 deletions fbpic/fields/numba_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,18 @@
This file is part of the Fourier-Bessel Particle-In-Cell code (FB-PIC)
It defines the optimized fields methods that use numba on a CPU
"""
import numba
from scipy.constants import c, epsilon_0, mu_0
c2 = c**2
from fbpic.threading_utils import njit_parallel, prange

@numba.jit('void(complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
float64[:,:], float64[:,:], float64[:,:], \
float64, int32, int32)')
@njit_parallel
def numba_correct_currents_standard( rho_prev, rho_next, Jp, Jm, Jz,
kz, kr, inv_k2, inv_dt, Nz, Nr ):
"""
Correct the currents in spectral space, using the standard pstad
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Calculate the intermediate variable F
Expand All @@ -33,13 +30,9 @@ def numba_correct_currents_standard( rho_prev, rho_next, Jp, Jm, Jz,
Jm[iz, ir] += -0.5 * kr[iz, ir] * F
Jz[iz, ir] += -1.j * kz[iz, ir] * F

@numba.jit('void(complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], complex128[:,:], \
complex128[:,:], complex128[:,:], \
float64[:,:], float64[:,:], float64[:,:], \
float64[:,:], float64[:,:], float64[:,:], float64[:,:], float64, \
int8, int32, int32)')
return

@njit_parallel
def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
rho_prev, rho_next,
rho_prev_coef, rho_next_coef, j_coef,
Expand All @@ -50,8 +43,8 @@ def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
See the documentation of SpectralGrid.push_eb_with
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Save the electric fields, since it is needed for the B push
Expand Down Expand Up @@ -106,7 +99,9 @@ def numba_push_eb_standard( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
+ j_coef[iz, ir]*( 1.j*kr[iz, ir]*Jp[iz, ir] \
+ 1.j*kr[iz, ir]*Jm[iz, ir] )

@numba.jit
return

@njit_parallel
def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
kz, kr, inv_k2,
j_corr_coef, T_eb, T_cc,
Expand All @@ -115,8 +110,8 @@ def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
Correct the currents in spectral space, using the assumption
of comoving currents
"""
# Loop over the 2D grid
for iz in range(Nz):
# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Calculate the intermediate variable F
Expand All @@ -130,7 +125,9 @@ def numba_correct_currents_comoving( rho_prev, rho_next, Jp, Jm, Jz,
Jm[iz, ir] += -0.5 * kr[iz, ir] * F
Jz[iz, ir] += -1.j * kz[iz, ir] * F

@numba.jit
return

@njit_parallel
def numba_push_eb_comoving( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
rho_prev, rho_next,
rho_prev_coef, rho_next_coef, j_coef,
Expand Down Expand Up @@ -207,3 +204,5 @@ def numba_push_eb_comoving( Ep, Em, Ez, Bp, Bm, Bz, Jp, Jm, Jz,
+ 1.j*kr[iz, ir]*Em_old ) \
+ j_coef[iz, ir]*( 1.j*kr[iz, ir]*Jp[iz, ir] \
+ 1.j*kr[iz, ir]*Jm[iz, ir] )

return
29 changes: 16 additions & 13 deletions fbpic/fields/spectral_transform/spectral_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .hankel import DHT
from .fourier import FFT

from .threading_methods import numba_rt_to_pm, numba_pm_to_rt
# Check if CUDA is available, then import CUDA functions
from fbpic.cuda_utils import cuda_installed
if cuda_installed:
Expand Down Expand Up @@ -139,12 +140,13 @@ def spect2interp_vect( self, spect_array_p, spect_array_m,
self.spect_buffer_r, self.spect_buffer_t )
else :
# Combine them on the CPU
# (It is important to write the affectation in the following way,
# since self.spect_buffer_p and self.spect_buffer_r actually point
# to the same object, for memory economy)
self.spect_buffer_r[:,:], self.spect_buffer_t[:,:] = \
( self.spect_buffer_p + self.spect_buffer_m), \
1.j*( self.spect_buffer_p - self.spect_buffer_m)
# (self.spect_buffer_r and self.spect_buffer_t are
# passed in the following line, in order to make things
# explicit, but they actually point to the same object
# as self.spect_buffer_p, self.spect_buffer_m,
# for economy of memory)
numba_pm_to_rt( self.spect_buffer_p, self.spect_buffer_m,
self.spect_buffer_r, self.spect_buffer_t )

# Finally perform the FFT (along axis 0, which corresponds to z)
self.fft.inverse_transform( self.spect_buffer_r, interp_array_r )
Expand Down Expand Up @@ -205,13 +207,14 @@ def interp2spect_vect( self, interp_array_r, interp_array_t,
self.spect_buffer_r, self.spect_buffer_t,
self.spect_buffer_p, self.spect_buffer_m )
else :
# Combine them on the CPU
# (It is important to write the affectation in the following way,
# since self.spect_buffer_p and self.spect_buffer_r actually point
# to the same object, for memory economy.)
self.spect_buffer_p[:,:], self.spect_buffer_m[:,:] = \
0.5*( self.spect_buffer_r - 1.j*self.spect_buffer_t ), \
0.5*( self.spect_buffer_r + 1.j*self.spect_buffer_t )
# Combine them on the GPU
# (self.spect_buffer_p and self.spect_buffer_m are
# passed in the following line, in order to make things
# explicit, but they actually point to the same object
# as self.spect_buffer_r, self.spect_buffer_t,
# for economy of memory)
numba_rt_to_pm( self.spect_buffer_r, self.spect_buffer_t,
self.spect_buffer_p, self.spect_buffer_m )

# Perform the inverse DHT (along axis -1, which corresponds to r)
self.dhtp.transform( self.spect_buffer_p, spect_array_p )
Expand Down
58 changes: 58 additions & 0 deletions fbpic/fields/spectral_transform/threading_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2016, FBPIC contributors
# Authors: Remi Lehe, Manuel Kirchen
# License: 3-Clause-BSD-LBNL
"""
This file is part of the Fourier-Bessel Particle-In-Cell code (FB-PIC)
It defines a set of functions that are useful when converting the
fields from interpolation grid to the spectral grid and vice-versa
"""
from fbpic.threading_utils import prange, njit_parallel

# ----------------------------------------------------
# Functions that combine components in spectral space
# ----------------------------------------------------

@njit_parallel
def numba_rt_to_pm( buffer_r, buffer_t, buffer_p, buffer_m ) :
"""
Combine the arrays buffer_r and buffer_t to produce the
arrays buffer_p and buffer_m, according to the rules of
the Fourier-Hankel decomposition (see associated paper)
"""
Nz, Nr = buffer_r.shape

# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Use intermediate variables, as the arrays
# buffer_r and buffer_t may actually point to the same
# object as buffer_p and buffer_m, for economy of memory
value_r = buffer_r[iz, ir]
value_t = buffer_t[iz, ir]
# Combine the values
buffer_p[iz, ir] = 0.5*( value_r - 1.j*value_t )
buffer_m[iz, ir] = 0.5*( value_r + 1.j*value_t )


@njit_parallel
def numba_pm_to_rt( buffer_p, buffer_m, buffer_r, buffer_t ) :
"""
Combine the arrays buffer_p and buffer_m to produce the
arrays buffer_r and buffer_t, according to the rules of
the Fourier-Hankel decomposition (see associated paper)
"""
Nz, Nr = buffer_p.shape

# Loop over the 2D grid (parallel in z, if threading is installed)
for iz in prange(Nz):
for ir in range(Nr):

# Use intermediate variables, as the arrays
# buffer_r and buffer_t may actually point to the same
# object as buffer_p and buffer_m, for economy of memory
value_p = buffer_p[iz, ir]
value_m = buffer_m[iz, ir]
# Combine the values
buffer_r[iz, ir] = ( value_p + value_m )
buffer_t[iz, ir] = 1.j*( value_p - value_m )
4 changes: 2 additions & 2 deletions fbpic/lpa_utils/laser/antenna.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from scipy.constants import e, c, epsilon_0, physical_constants
r_e = physical_constants['classical electron radius'][0]
from .profiles import gaussian_profile
from fbpic.particles.utility_methods import weights
from fbpic.particles.numba_methods import deposit_field_numba
from fbpic.particles.utilities.utility_methods import weights
from fbpic.particles.deposition.numba_methods import deposit_field_numba

# Check if CUDA is available, then import CUDA functions
from fbpic.cuda_utils import cuda_installed
Expand Down

0 comments on commit 68d4554

Please sign in to comment.