In [1]:
import numpy as np

import numba
from numba import njit

In [2]:
x = np.arange(100).reshape(10, 10)

In [67]:
@njit
def go_fast(a, parallel=True):
    trace = 0
    for i in range(a.shape[0]):
        trace += np.tanh(a[i, i])

    return a + trace

In [68]:
%%time
go_fast(x)

CPU times: user 169 ms, sys: 4.85 ms, total: 174 ms
Wall time: 175 ms


array([[  9.,  10.,  11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.],
       [ 19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,  27.,  28.],
       [ 29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.],
       [ 39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.],
       [ 49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.],
       [ 59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.],
       [ 69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,  78.],
       [ 79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.],
       [ 89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.],
       [ 99., 100., 101., 102., 103., 104., 105., 106., 107., 108.]])

In [5]:
%%time
go_fast(x)

CPU times: user 15 µs, sys: 0 ns, total: 15 µs
Wall time: 17.2 µs


array([[  9.,  10.,  11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.],
       [ 19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,  27.,  28.],
       [ 29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.],
       [ 39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.],
       [ 49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.],
       [ 59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.],
       [ 69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,  78.],
       [ 79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.],
       [ 89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.],
       [ 99., 100., 101., 102., 103., 104., 105., 106., 107., 108.]])

In [6]:
numba.sys.hash_info

sys.hash_info(width=64, modulus=2305843009213693951, inf=314159, nan=0, imag=1000003, algorithm='siphash24', hash_bits=64, seed_bits=128, cutoff=0)

In [9]:
@njit(parallel=True)
def _generate_KMU(KMT):
    """Computes KMU from KMT."""
    KMU = np.zeros_like(KMT)
    for i in prange(KMT.shape[0]):
        for j in prange(KMT.shape[1]):
            KMU[i, j] = min(KMT[i, j], KMT[i - 1, j], KMT[i, j - 1], KMT[i - 1, j - 1])
    return KMU

In [10]:
KMT = np.arange(100).reshape(10, 10)
KMU = np.zeros_like(KMT)

In [11]:
@numba.njit(parallel=True)
def generate_KMU_v1(KMT, KMU):
    for i in numba.prange(KMT.shape[0]):
        for j in numba.prange(KMT.shape[1]):
            KMU[i, j] = min(KMT[i, j], KMT[i - 1, j], KMT[i, j - 1], KMT[i - 1, j - 1])

In [84]:
generate_KMU_v1(KMT, KMU)
generate_KMU_v1.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function generate_KMU_v1, <ipython-
input-11-afddc4168499> (1)  


Parallel loop listing for  Function generate_KMU_v1, <ipython-input-11-afddc4168499> (1) 
-------------------------------------------------------------------------------------------|loop #ID
@numba.njit(parallel=True)                                                                 | 
def generate_KMU_v1(KMT, KMU):                                                             | 
    for i in numba.prange(KMT.shape[0]):---------------------------------------------------| #1
        for j in numba.prange(KMT.shape[1]):-----------------------------------------------| #0
            KMU[i, j] = min(KMT[i, j], KMT[i - 1, j], KMT[i, j - 1], KMT[i - 1, j - 1])    | 
--------------------------------- Fusing loops ---------------------------------
Attempting fusion of parallel loops (combines loops with similar properties)...
----------------------------- Before Optimisation --------------------

In [85]:
@numba.njit(parallel=True)
def generate_KMU(KMT):
    KMU = np.zeros_like(KMT)
    for i in numba.prange(KMT.shape[0]):
        for j in numba.prange(KMT.shape[1]):
            KMU[i, j] = min(KMT[i, j], KMT[i - 1, j], KMT[i, j - 1], KMT[i - 1, j - 1])

    return KMU

In [86]:
generate_KMU(KMT)
generate_KMU.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function generate_KMU, <ipython-
input-85-435a1b491be1> (1)  


Parallel loop listing for  Function generate_KMU, <ipython-input-85-435a1b491be1> (1) 
-------------------------------------------------------------------------------------------|loop #ID
@numba.njit(parallel=True)                                                                 | 
def generate_KMU(KMT):                                                                     | 
    KMU = np.zeros_like(KMT)                                                               | 
    for i in numba.prange(KMT.shape[0]):---------------------------------------------------| #11
        for j in numba.prange(KMT.shape[1]):-----------------------------------------------| #10
            KMU[i, j] = min(KMT[i, j], KMT[i - 1, j], KMT[i, j - 1], KMT[i - 1, j - 1])    | 
                                                                                           | 
    return KMU                                 

In [14]:
%%timeit
generate_KMU(KMT)

The slowest run took 201.54 times longer than the fastest. This could mean that an intermediate result is being cached.
56.3 µs ± 130 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
@numba.generated_jit(nopython=True)
def is_missing(x):
    if isinstance(x, numba.types.Float):
        return lambda x: np.isnan(x)
    elif isinstance(x, (numba.types.NPDatetime, numba.types.NPTimedelta)):
        missing = x('NaT')
        return lambda x: x == missing

    else:
        lambda x: False

In [28]:
@numba.vectorize(
    [
        numba.int32(numba.int32, numba.int32),
        numba.int64(numba.int64, numba.int64),
        numba.float32(numba.float32, numba.float32),
        numba.float64(numba.float64, numba.float64),
    ]
)
def f(x, y):
    return x + y

In [29]:
f(2, 4)

6

In [30]:
%%timeit
f(2, 4)

898 ns ± 10.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [31]:
f(4, 4)

8

In [32]:
a = np.arange(12).reshape(3, 4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [33]:
f.reduce(a, axis=1)

array([ 6, 22, 38])

In [36]:
f.accumulate(a, axis=1)

array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22],
       [ 8, 17, 27, 38]])

In [37]:
from numba import float32, float64, int32, int64

In [38]:
@numba.guvectorize([(int64[:], int64, int64[:])], '(n),()->(n)')
def g(x, y, res):
    for i in range(x.shape[0]):
        res[i] = x[i] + y

In [42]:
a, a.shape

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 (3, 4))

In [40]:
g(a, 100)

array([[100, 101, 102, 103],
       [104, 105, 106, 107],
       [108, 109, 110, 111]])

In [43]:
g(a, np.array([10, 20, 30]))

array([[10, 11, 12, 13],
       [24, 25, 26, 27],
       [38, 39, 40, 41]])

In [45]:
g

<ufunc 'g'>

In [46]:
f

<numba._DUFunc 'f'>

In [47]:
f.ufunc.types

['ii->i', 'll->l', 'ff->f', 'dd->d']

In [48]:
g.types

['ll->l']

## @jitclass

In [49]:
spec = [('value', int32), ('array', float32[:])]

In [54]:
@numba.jitclass(spec)
class Bag:
    def __init__(self, value):
        self.value = value
        self.array = np.zeros(value, dtype=np.float32)

    @property
    def size(self):
        return self.array.size

    def increment(self, val):
        for i in range(self.size):
            self.array[i] = val

        return self.array

In [55]:
bag = Bag(5)

In [56]:
bag

<numba.jitclass.boxing.Bag at 0x11ff10810>

In [57]:
%%timeit
bag.increment(10)

987 ns ± 24.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [61]:
min(1, 2, 4, 5)

1

In [82]:
@njit(parallel=True)
def test(x):
    n = x.shape[0]
    a = np.sin(x)
    b = np.cos(a * a)
    acc = 0
    for i in numba.prange(n - 2):
        for j in numba.prange(n - 1):
            acc += b[i] + b[j + 1]
    return acc


test(np.arange(10))

test.parallel_diagnostics(level=2)

 
 Parallel Accelerator Optimizing:  Function test, <ipython-
input-82-96da674543a7> (1)  


Parallel loop listing for  Function test, <ipython-input-82-96da674543a7> (1) 
-----------------------------------------|loop #ID
@njit(parallel=True)                     | 
def test(x):                             | 
    n = x.shape[0]                       | 
    a = np.sin(x)------------------------| #6
    b = np.cos(a * a)--------------------| #7
    acc = 0                              | 
    for i in numba.prange(n - 2):--------| #9
        for j in numba.prange(n - 1):----| #8
            acc += b[i] + b[j + 1]       | 
    return acc                           | 
----------------------------- Before Optimisation ------------------------------
Parallel region 0:
+--6 (parallel)
+--7 (parallel)


Parallel region 1:
+--9 (parallel)
   +--8 (parallel)


--------------------------------------------------------------------------------
------------------------------ After Optimisation --------