In [3]:
import numpy as np

<img src="https://cdn.dribbble.com/users/915978/screenshots/3034118/numba_1x.jpg" alt="Drawing" style="width: 40%;"/>



- Numba is a compiler for Python array and numerical functions.
- Numba generates optimized machine code from pure Python code with a few simple annotations
- Python code is just-in-time optimized to performance similar as C, C++ and Fortran, without having to switch languages or Python interpreters.
- The code is generated on-the-fly for CPU (default) or GPU hardware.

## Python decorator

A decorator is used to modify a function or a class. A reference to a function "func" or a class "C" is passed to a decorator and the decorator returns a modified function or class. The modified functions or classes usually contain calls to the original function "func" or class "C". 

In [7]:
def timeit(function):
    def wrapper(*args, **kargs):
        import time
        t1 = time.time()
        result = function(*args, **kargs)
        t2 = time.time()
        print("execution time", t2-t1)
        return result
    return wrapper

@timeit
def f(a, b):
    return a + b

print(f(1, 2))

execution time 1.1920928955078125e-06
3


## First example

In [8]:
from numba import jit
@jit
def sum(a, b):
    return a + b

- Compilation will be deferred until the first function execution. 
- Numba will infer the argument types at call time.

In [11]:
sum(1, 2), sum(1j, 2)

(3, (2+1j))

In [12]:
x = np.random.rand(10)
y = np.random.rand(10)
sum(x, y)

array([ 1.12121172,  1.67126344,  1.63359803,  1.03903651,  1.33418   ,
        1.80767788,  1.16420468,  0.61433023,  1.05862147,  1.4855171 ])

## Performance

In [13]:
x = np.random.rand(10000000)

In [15]:
%timeit x.sum() # Numpy

4.09 ms ± 251 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
@jit
def numba_sum(x):
    res= 0
    for i in range(x.size):
        res += x[i]
    return res

In [18]:
%timeit numba_sum(x)

12.4 ms ± 352 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Numba methods

In [19]:
@jit
def jit_sum(a, b):
    return a + b

In [21]:
jit_sum.inspect_types() # jit_sum has not been compiled

In [23]:
jit_sum(1, 2) # call it once with ints
jit_sum.inspect_types()

jit_sum (int64, int64)
--------------------------------------------------------------------------------
# File: <ipython-input-19-ebda2b2f7dda>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: int64
    #   b = arg(1, name=b)  :: int64
    #   $0.3 = a + b  :: int64
    #   $0.4 = cast(value=$0.3)  :: int64
    #   return $0.4

    return a + b




In [24]:
jit_sum(1., 2.) # call it once with doubles
jit_sum.inspect_types()

jit_sum (int64, int64)
--------------------------------------------------------------------------------
# File: <ipython-input-19-ebda2b2f7dda>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: int64
    #   b = arg(1, name=b)  :: int64
    #   $0.3 = a + b  :: int64
    #   $0.4 = cast(value=$0.3)  :: int64
    #   return $0.4

    return a + b


jit_sum (float64, float64)
--------------------------------------------------------------------------------
# File: <ipython-input-19-ebda2b2f7dda>
# --- LINE 1 --- 
# label 0
#   del b
#   del a
#   del $0.3

@jit

# --- LINE 2 --- 

def jit_sum(a, b):

    # --- LINE 3 --- 
    #   a = arg(0, name=a)  :: float64
    #   b = arg(1, name=b)  :: float64
    #   $0.3 = a + b  :: float64
    #   $0.4 = cast(value=$0.3)  :: float64
    #   return $0.4

    return a + b




- `jit_sum.inspect_llvm()` returns a dict with llvm representation.

LLVM is a library that is used to construct, optimize and produce intermediate and/or binary machine code.

- `jit_sum.inspect_asm().items()` returns a dict with assembler information. 

In [25]:
jit_sum.py_func(1, 2) # call origin python function without numba process

3

## Types coercion

Tell Numba the function signature you are expecting.

In [27]:
@jit(['int32[:](int32[:], int32[:])','int32(int32, int32)'])
def product(a, b):
    return a*b

In [28]:
product(2, 3), product(2.2, 3.2)

(6, 6)

In [29]:
a = np.arange(10, dtype=np.int32)
b = np.arange(10, dtype=np.int32)
product(a, b)

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)

In [20]:
a = np.random.random(10) # Numpy arrays contain double by default
b = np.random.random(10)
try:
    product(a, b)
except TypeError as e:
    print("TypeError:",e)

TypeError: No matching definition for argument type(s) array(float64, 1d, C), array(float64, 1d, C)


## Numba types
```C
void,
intp, uintp,
intc, uintc,
int8, uint8, int16, uint16, int32, uint32, int64, uint64,
float32, float64,
complex64, complex128.
```
### Arrays
```C
float32[:] 
float64[:, :]
```

## Numba flags

- ** nopython ** : Compilation fails if you use pure Python objects.
- ** nogil ** : Release Global Interpreter Lock (GIL). Enable this option when you use threads.
- ** cache ** : Do not recompile the function.

# Inlining


In [21]:
import math
from numba import njit

@njit
def square(x):
    return x ** 2

@njit
def hypot(x, y):
    return math.sqrt(square(x) + square(y))

In [22]:
hypot(2., 3.)

3.605551275463989

In [23]:
for k, v in hypot.inspect_asm().items():
    print(v)

	.section	__TEXT,__text,regular,pure_instructions
	.macosx_version_min 10, 11
	.globl	__ZN8__main__10hypot$2412Edd
	.p2align	4, 0x90
__ZN8__main__10hypot$2412Edd:
	vmulsd	%xmm0, %xmm0, %xmm0
	vmulsd	%xmm1, %xmm1, %xmm1
	vaddsd	%xmm1, %xmm0, %xmm0
	vsqrtsd	%xmm0, %xmm0, %xmm0
	vmovsd	%xmm0, (%rdi)
	xorl	%eax, %eax
	retq

	.globl	__ZN7cpython8__main__10hypot$2412Edd
	.p2align	4, 0x90
__ZN7cpython8__main__10hypot$2412Edd:
	.cfi_startproc
	pushq	%r15
Lcfi0:
	.cfi_def_cfa_offset 16
	pushq	%r14
Lcfi1:
	.cfi_def_cfa_offset 24
	pushq	%r13
Lcfi2:
	.cfi_def_cfa_offset 32
	pushq	%r12
Lcfi3:
	.cfi_def_cfa_offset 40
	pushq	%rbx
Lcfi4:
	.cfi_def_cfa_offset 48
	subq	$32, %rsp
Lcfi5:
	.cfi_def_cfa_offset 80
Lcfi6:
	.cfi_offset %rbx, -48
Lcfi7:
	.cfi_offset %r12, -40
Lcfi8:
	.cfi_offset %r13, -32
Lcfi9:
	.cfi_offset %r14, -24
Lcfi10:
	.cfi_offset %r15, -16
	movq	%rdi, %rbx
	movabsq	$_.const.hypot, %r10
	movabsq	$_PyArg_UnpackTuple, %r11
	leaq	24(%rsp), %r8
	leaq	16(%rsp), %r9
	movl	$2, %edx
	movl	$2, %

In [24]:
import pyculib.fft
import numba.cuda
import numpy as np

@numba.cuda.jit
def apply_mask(frame, mask):
    i, j = numba.cuda.grid(2)
    frame[i, j] *= mask[i, j]

x = np.linspace(0, 4*np.pi, 720) 
y = np.linspace(0, 4*np.pi, 1280) 

X, Y = np.meshgrid(x,y)
frame = np.cos(X)*np.sin(Y)
mask = frame / (np.max(frame)-np.min(frame))

out = np.empty_like(mask, dtype=np.complex64)
gpu_temp = numba.cuda.to_device(out)  # make GPU array
gpu_mask = numba.cuda.to_device(mask)  # make GPU array

pyculib.fft.fft(frame.astype(np.complex64), gpu_temp)  # implied host->device
apply_mask[blocks, tpb](gpu_temp, gpu_mask)  # all on device
pyculib.fft.ifft(gpu_temp, out)  # implied device->host

Exception: Cannot open library for cusparse:
library cusparse not found

In [27]:

import numpy as np
from numba import vectorize

@vectorize(['float32(float32, float32)'], target='cuda')
def Add(a, b):
  return a + b

# Initialize arrays
N = 100000
A = np.ones(N, dtype=np.float32)
B = np.ones(A.shape, dtype=A.dtype)
C = np.empty_like(A, dtype=A.dtype)



In [28]:
%%time
# Add arrays on GPU
C = Add(A, B)

CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBAPRO_CUDA_DRIVER
with the file path of the CUDA driver shared library.
:

In [29]:
%%time
C = A+ B

CPU times: user 405 µs, sys: 463 µs, total: 868 µs
Wall time: 356 µs


In [30]:
import numpy as np
from pyculib import rand as curand

prng = curand.PRNG(rndtype=curand.PRNG.XORWOW)
rand = np.empty(100000)
prng.uniform(rand)
print(rand[:10])

Exception: Cannot open library for cusparse:
library cusparse not found

# References

* [Numba by Loic Gouarin](https://github.com/gouarin/cours_numba_2017)
* [Numba Documentation](http://numba.pydata.org/numba-doc/latest/index.html)
* [Numbapro](https://github.com/ContinuumIO/numbapro-examples/)
* [Numba examples](https://github.com/harrism/numba_examples)
