In [1]:
from numba import jit, prange, float64
import numpy as np

In [8]:
@jit(nopython=True, parallel=True, fastmath=True, cache=True)
def sum_field_particle(x: np.array, y: np.array, z: np.array,
                       Fx: np.array, Fy: np.array, Fz: np.array,
                       z_start: float=0.0) -> (np.array, np.array, np.array):
    ''' Sum particles fields

    '''
    n = int(len(x))
    r3 = np.zeros(n)
    for k in prange(int(n)):
        for i in prange(int(n)):
            if z[i] >= z_start and i!=k:
                r3[k] = np.sqrt(((x[k] - x[i])*(x[k] - x[i]) + (y[k] - y[i])*(y[k] - y[i]) + (z[k] - z[i])*(z[k] - z[i]))*
                              ((x[k] - x[i])*(x[k] - x[i]) + (y[k] - y[i])*(y[k] - y[i]) + (z[k] - z[i])*(z[k] - z[i]))*
                              ((x[k] - x[i])*(x[k] - x[i]) + (y[k] - y[i])*(y[k] - y[i]) + (z[k] - z[i])*(z[k] - z[i])))
                Fx[k] = Fx[k] + (x[k] - x[i])/r3[k]
                Fy[k] = Fy[k] + (y[k] - y[i])/r3[k]
                Fz[k] = Fz[k] + (z[k] - z[i])/r3[k]
    return Fx, Fy, Fz

In [9]:
n = 1_000
x = np.random.uniform(0, 1, n)
y = np.random.uniform(0, 1, n)
z = np.random.uniform(0, 1, n)
Fx, Fy, Fz = np.zeros(n), np.zeros(n), np.zeros(n)

In [10]:
%%timeit -n1000
sum_field_particle(x, y, z, Fx, Fy, Fz)

2.8 ms ± 339 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [11]:
@numba.jit(nopython=True, parallel=True, fastmath=True, cache=True)
def sum_field_particle(x: np.array, y: np.array, z: np.array,
                       z_start: float=0.0) -> (np.array, np.array, np.array):
    ''' Sum particles fields

    '''
    n = int(len(x))
    Fx, Fy, Fz = np.zeros(n), np.zeros(n), np.zeros(n)
    r3 = np.zeros(n)
    for i in prange(int(n)):
        if z[i] >= z_start:
            r3 = np.sqrt(((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i]))*
                          ((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i]))*
                          ((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i])))
            r3[i] = float(np.inf)
            Fx = Fx + (x - x[i])/r3
            Fy = Fy + (y - y[i])/r3
            Fz = Fz + (z - z[i])/r3
    return Fx, Fy, Fz

NameError: name 'numba' is not defined

In [81]:
%%timeit -n100
sum_field_particle(x, y, z)

[1m
File "<ipython-input-80-3a5126133093>", line 10:[0m
[1mdef sum_field_particle(x: np.array, y: np.array, z: np.array,
    <source elided>
    r3 = np.zeros(n)
[1m    for i in prange(int(n)):
[0m    [1m^[0m[0m
[0m
[1m
File "<ipython-input-80-3a5126133093>", line 10:[0m
[1mdef sum_field_particle(x: np.array, y: np.array, z: np.array,
    <source elided>
    r3 = np.zeros(n)
[1m    for i in prange(int(n)):
[0m    [1m^[0m[0m
[0m
[1m
File "<ipython-input-80-3a5126133093>", line 10:[0m
[1mdef sum_field_particle(x: np.array, y: np.array, z: np.array,
    <source elided>
    r3 = np.zeros(n)
[1m    for i in prange(int(n)):
[0m    [1m^[0m[0m
[0m


LoweringError: Failed in nopython mode pipeline (step: nopython mode backend)
[1m[1mBuffer dtype cannot be buffer, have dtype: array(float64, 1d, C)
[1m
File "<ipython-input-80-3a5126133093>", line 10:[0m
[1mdef sum_field_particle(x: np.array, y: np.array, z: np.array,
    <source elided>
    r3 = np.zeros(n)
[1m    for i in prange(int(n)):
[0m    [1m^[0m[0m
[0m
[0m[1m[1] During: lowering "id=91[LoopNest(index_variable = parfor_index.5569, range = (0, $60.4, 1))]{90: <ir.Block at <ipython-input-80-3a5126133093> (11)>, 388: <ir.Block at <ipython-input-80-3a5126133093> (10)>, 76: <ir.Block at <ipython-input-80-3a5126133093> (10)>}Var(parfor_index.5569, <ipython-input-80-3a5126133093> (10))" at <ipython-input-80-3a5126133093> (10)[0m
-------------------------------------------------------------------------------
This should not have happened, a problem has occurred in Numba's internals.
You are currently using Numba version 0.45.1.

Please report the error message and traceback, along with a minimal reproducer
at: https://github.com/numba/numba/issues/new

If more help is needed please feel free to speak to the Numba core developers
directly at: https://gitter.im/numba/numba

Thanks in advance for your help in improving Numba!



In [20]:
%load_ext Cython

In [23]:
%%cython

def sum_field_particle(x, y, z,
                       z_start: float=0.0):
    ''' Sum particles fields

    '''
    n = int(len(x))
    Fx, Fy, Fz = np.zeros(n), np.zeros(n), np.zeros(n)
    r3 = np.zeros(n)
    for i in np.arange(int(n)):
        if z[i] >= z_start:
            r3 = np.sqrt(((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i]))*
                          ((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i]))*
                          ((x - x[i])*(x - x[i]) + (y - y[i])*(y - y[i]) + (z - z[i])*(z - z[i])))
            r3[i] = float(np.inf)
            Fx = Fx + (x - x[i])/r3
            Fy = Fy + (y - y[i])/r3
            Fz = Fz + (z - z[i])/r3
    return Fx, Fy, Fz


Error compiling Cython file:
------------------------------------------------------------
...
                       z_start: float=0.0):
    ''' Sum particles fields

    '''
    n = int(len(x))
    Fx, Fy, Fz = np.zeros(n), np.zeros(n), np.zeros(n)
                ^
------------------------------------------------------------

/Users/fuodorov/.ipython/cython/_cython_magic_97e379738c3aa23cbc0cd49879b5d161.pyx:8:17: undeclared name not builtin: np


TypeError: object of type 'NoneType' has no len()

In [94]:
from numba import jit, prange
import numpy as np

# Numpy array of 10k elements
input_ndarray = np.random.rand(10000).reshape(10000)

@jit(nopython=True, parallel=True, fastmath=True)
def go_super_fast(a):
    trace = 0
    for i in prange(a.shape[0]):
        trace += np.tanh(a[i])
    return a + trace

%timeit go_super_fast(input_ndarray)

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.
[1m
File "<ipython-input-94-a6afae01596b>", line 8:[0m
[1m@jit(nopython=True, parallel=True, fastmath=True)
[1mdef go_super_fast(a):
[0m[1m^[0m[0m
[0m
  self.func_ir.loc))


89.6 µs ± 3.29 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [96]:
go_super_fast.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function go_super_fast, <ipython-
input-94-a6afae01596b> (7)  


Parallel loop listing for  Function go_super_fast, <ipython-input-94-a6afae01596b> (7) 
-----------------------------------------------------|loop #ID
@jit(nopython=True, parallel=True, fastmath=True)    | 
def go_super_fast(a):                                | 
    trace = 0                                        | 
    for i in prange(a.shape[0]):---------------------| #43
        trace += np.tanh(a[i])                       | 
    return a + trace---------------------------------| #42
Performing sequential lowering of loops...
--------------------------------------------------------------------------------
  Trying to fuse loops #43 and #42:
    - fusion failed: parallel loop 43 has a dependency on the body of parallel 
loop 42. 
  Trying to fuse loops #43 and #42:
    - fusion failed: parallel loop 43 has a dependency on the body of parallel 
loop 42. 
-----------------------------

In [19]:
from numba import njit, prange
@njit(parallel=True, fastmath=True)
def do_sum_parallel_fast(A):
    n = len(A)
    acc = 0.
    for i in prange(n):
        acc += np.sqrt(A[i])
    return acc

In [21]:
%timeit do_sum_parallel_fast(x)

462 µs ± 16.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
