Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of CPU multi-threading #80

Merged
merged 41 commits into from
Jul 22, 2017
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
9ae48f3
Initial CPU multi-threading implementation
MKirchen Jul 14, 2017
497c55a
Initial CPU multi-threading implementation (part 2)
MKirchen Jul 14, 2017
8f2e3ff
Fix pyflakes errors
MKirchen Jul 14, 2017
380b326
Print number of threads along with number of MPI procs
RemiLehe Jul 15, 2017
af15196
Swapped the order of global arrays + removed thread-local arrays
RemiLehe Jul 16, 2017
1e252be
Fix pyflakes errors
RemiLehe Jul 16, 2017
f8536d7
Merge pull request #82 from RemiLehe/better_thread_scaling
MKirchen Jul 16, 2017
e539347
Merge branch 'dev' into cpuprange
RemiLehe Jul 16, 2017
f9ce679
Corrected import pattern in laser antenna
RemiLehe Jul 16, 2017
eedf019
Fix thread index calculation
RemiLehe Jul 16, 2017
3a5f0f0
Fix automated tests
RemiLehe Jul 17, 2017
a3c2248
Implement parallel reduce
RemiLehe Jul 17, 2017
da28bbe
Added docstring to the function
RemiLehe Jul 17, 2017
1d9c4a7
Merge pull request #83 from RemiLehe/parallel_reduction
MKirchen Jul 17, 2017
f70cca1
Added cubic deposition functions
MKirchen Jul 17, 2017
2053e1f
Adapted particles.py for cubic prange deposition
MKirchen Jul 17, 2017
392354d
Removed linear_non_atomic shape from uniform_rho test
MKirchen Jul 17, 2017
7055845
Corrected some bugs introduced in last commits
MKirchen Jul 17, 2017
0a22108
Fix cubic deposition and cubic gathering
MKirchen Jul 17, 2017
c2f75b3
Remove function signature in field methods
RemiLehe Jul 19, 2017
18486c1
Create threading_utils.py
RemiLehe Jul 19, 2017
fce6692
Check if threading is installed in main.py
RemiLehe Jul 19, 2017
b6c3584
Added threaded methods for the fields
RemiLehe Jul 19, 2017
0c52a62
Added parallel capability for grid methods
RemiLehe Jul 19, 2017
8458a0d
Removed threaded push methods
RemiLehe Jul 19, 2017
6317aa3
Corrected push_x's return
RemiLehe Jul 19, 2017
c817b44
Correct push_p and push_x with return function
RemiLehe Jul 19, 2017
f1f2ab2
Give the right threading flag to particles
RemiLehe Jul 19, 2017
54bd6f0
Threaded the routines that convert from p/m to r/t components
RemiLehe Jul 19, 2017
5d2c7e5
Remove the flag `use_threading` as an input argument
RemiLehe Jul 20, 2017
62e37c5
Correct pyflakes errors
RemiLehe Jul 20, 2017
076b668
Thread the shifting of the grid in spectral space
RemiLehe Jul 20, 2017
0e654a1
Fix the threaded shift function
RemiLehe Jul 20, 2017
11ce49d
Remove arguments nthreads
RemiLehe Jul 20, 2017
ffe436c
Merge pull request #89 from RemiLehe/threaded_grids
MKirchen Jul 20, 2017
93149c0
Merge pull request #91 from RemiLehe/thread_shift_window
MKirchen Jul 20, 2017
77be6cf
Modified import structure of the prange function
RemiLehe Jul 20, 2017
7772819
Replace line endings to unix style
RemiLehe Jul 22, 2017
afd5d3b
Replace tx_chunks by an array
RemiLehe Jul 22, 2017
d35a7c1
Changes in variable names and docstring
RemiLehe Jul 22, 2017
6204585
Removed all mentions of linear_non_atomic
RemiLehe Jul 22, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions fbpic/lpa_utils/laser/antenna.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from scipy.constants import e, c, epsilon_0, physical_constants
r_e = physical_constants['classical electron radius'][0]
from .profiles import gaussian_profile
from fbpic.particles.utility_methods import weights
from fbpic.particles.numba_methods import deposit_field_numba
from fbpic.particles.utilities.utility_methods import weights
from fbpic.particles.deposition.numba_methods import deposit_field_numba

# Check if CUDA is available, then import CUDA functions
from fbpic.cuda_utils import cuda_installed
Expand Down
64 changes: 44 additions & 20 deletions fbpic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# (This needs to be done before the other imports,
# as it sets the cuda context)
from mpi4py import MPI
import numba
# Check if CUDA is available, then import CUDA functions
from .cuda_utils import cuda_installed
if cuda_installed:
Expand Down Expand Up @@ -41,12 +42,11 @@ class Simulation(object):
def __init__(self, Nz, zmax, Nr, rmax, Nm, dt, p_zmin, p_zmax,
p_rmin, p_rmax, p_nz, p_nr, p_nt, n_e, zmin=0.,
n_order=-1, dens_func=None, filter_currents=True,
v_comoving=None, use_galilean=False,
initialize_ions=False, use_cuda=False,
n_guard=None, n_damp=30,
exchange_period=None, boundaries='periodic',
gamma_boost=None, use_all_mpi_ranks=True,
particle_shape='linear' ):
v_comoving=None, use_galilean=False, initialize_ions=False,
use_cuda=False, use_threading=True, nthreads=None,
n_guard=None, n_damp=30, exchange_period=None,
boundaries='periodic', gamma_boost=None,
use_all_mpi_ranks=True, particle_shape='linear' ):
"""
Initializes a simulation, by creating the following structures:

Expand Down Expand Up @@ -132,6 +132,12 @@ def dens_func( z, r ) ...

use_cuda: bool, optional
Wether to use CUDA (GPU) acceleration
use_threading : bool, optional
Wether to use multi-threading on the CPU.
nthreads: int, optional
Number of CPU multi-threading threads used (if use_threading
is set). If nthreads is set to None, the number of threads
are automatically determined.

n_guard: int, optional
Number of guard cells to use at the left and right of
Expand Down Expand Up @@ -186,13 +192,23 @@ def dens_func( z, r ) ...
to first order shapes, 'linear_non_atomic' uses an equivalent
deposition scheme to 'linear' which avoids atomics on the GPU.
"""
# Check whether to use cuda
# Check whether to use CUDA
self.use_cuda = use_cuda
if (use_cuda==True) and (cuda_installed==False):
print('*** Cuda not available for the simulation.')
print('*** Performing the simulation on CPU.')
self.use_cuda = False

# CPU multi-threading
self.use_threading = use_threading
if self.use_threading:
# Define number of threads used
if nthreads is not None:
# Automatically take numba preset for number of threads
self.nthreads = nthreads
numba.config.NUMBA_NUM_THREADS = self.nthreads
else:
# Set user-defined number of threads
self.nthreads = numba.config.NUMBA_NUM_THREADS
# Register the comoving parameters
self.v_comoving = v_comoving
self.use_galilean = use_galilean
Expand All @@ -212,7 +228,7 @@ def dens_func( z, r ) ...
self.comm = BoundaryCommunicator( Nz, zmin, zmax, Nr, rmax, Nm, dt,
boundaries, n_order, n_guard, n_damp, exchange_period,
use_all_mpi_ranks )
print_simulation_setup( self.comm, self.use_cuda )
print_simulation_setup( self.comm, self.use_cuda, self.use_threading )
# Modify domain region
zmin, zmax, p_zmin, p_zmax, Nz = \
self.comm.divide_into_domain(zmin, zmax, p_zmin, p_zmax)
Expand All @@ -234,19 +250,20 @@ def dens_func( z, r ) ...
# Initialize the electrons and the ions
grid_shape = self.fld.interp[0].Ez.shape
self.ptcl = [
Particles( q=-e, m=m_e, n=n_e, Npz=Npz, zmin=p_zmin,
zmax=p_zmax, Npr=Npr, rmin=p_rmin, rmax=p_rmax,
Nptheta=p_nt, dt=dt, dens_func=dens_func,
use_cuda=self.use_cuda, uz_m=uz_m,
grid_shape=grid_shape, particle_shape=particle_shape) ]
Particles(q=-e, m=m_e, n=n_e, Npz=Npz, zmin=p_zmin,
zmax=p_zmax, Npr=Npr, rmin=p_rmin, rmax=p_rmax,
Nptheta=p_nt, dt=dt, dens_func=dens_func, uz_m=uz_m,
grid_shape=grid_shape, particle_shape=particle_shape,
use_cuda=self.use_cuda,
use_threading=self.use_threading) ]
if initialize_ions :
self.ptcl.append(
Particles(q=e, m=m_p, n=n_e, Npz=Npz, zmin=p_zmin,
zmax=p_zmax, Npr=Npr, rmin=p_rmin, rmax=p_rmax,
Nptheta=p_nt, dt=dt, dens_func=dens_func,
use_cuda=self.use_cuda, uz_m=uz_m,
grid_shape=grid_shape,
particle_shape=particle_shape ) )
Nptheta=p_nt, dt=dt, dens_func=dens_func, uz_m=uz_m,
grid_shape=grid_shape, particle_shape=particle_shape,
use_cuda=self.use_cuda,
use_threading=self.use_threading) )

# Register the number of particles per cell along z, and dt
# (Necessary for the moving window)
Expand Down Expand Up @@ -578,7 +595,7 @@ def progression_bar( i, Ntot, measured_start, Nbars=50, char='-'):
sys.stdout.write(', %d:%02d:%02d left' % (h, m, s))
sys.stdout.flush()

def print_simulation_setup( comm, use_cuda ):
def print_simulation_setup( comm, use_cuda, use_threading ):
"""
Print message about the number of proc and
whether it is using GPU or CPU.
Expand All @@ -590,13 +607,20 @@ def print_simulation_setup( comm, use_cuda ):

use_cuda: bool
Whether the simulation is set up to use CUDA

use_threading: bool
Whether the simulation is set up to use threads on CPU
"""
if comm.rank == 0:
if use_cuda:
message = "\nRunning FBPIC on GPU "
else:
message = "\nRunning FBPIC on CPU "
message += "with %d proc.\n" %comm.size
message += "with %d proc" %comm.size
if use_threading and not use_cuda:
message += " (%d threads per proc)" %numba.config.NUMBA_NUM_THREADS
message += ".\n"

print( message )

def adapt_to_grid( x, p_xmin, p_xmax, p_nx, ncells_empty=0 ):
Expand Down