# Numba and NumPy on the CPU

https://github.com/numba/pydata-amsterdam2019-numba

In [1]:
import numpy as np

main attributes of a numpy array:
- data: a pointer to a data buffer with the actual array values
- dtype: the data type of each array element (ex: float32, int64, complex128)
- shape: size of each array dimension (ex: 1D = (4, ); 2D = (6, 8); 3D = (2, 4, 6)
- strides: the number of bytes that separate elemetns as you move along each dimension

In [4]:
x = np.zeros(shape=(2,), dtype="int32")
print(x.data)
print(x.dtype)
print(x.shape)
print(x.strides)

<memory at 0x7f48c0d4e588>
int32
(2,)
(4,)


In [5]:
x = np.zeros(shape=(10,), dtype="int32")
print(x.data)
print(x.dtype)
print(x.shape)
print(x.strides)

<memory at 0x7f48c0d4e588>
int32
(10,)
(4,)


In [3]:
x = np.zeros(shape=(2,3), dtype="int32")
print(x.data)
print(x.dtype)
print(x.shape)
print(x.strides)

<memory at 0x7f48c0e92708>
int32
(2, 3)
(12, 4)


In [6]:
x = np.zeros(shape=(10,), dtype="int64")
print(x.data)
print(x.dtype)
print(x.shape)
print(x.strides)

<memory at 0x7f48c0d4e588>
int64
(10,)
(8,)


In [8]:
x = np.zeros(shape=(10,), dtype="complex128")
print(x.data)
print(x.dtype)
print(x.shape)
print(x.strides)

<memory at 0x7f48c0d4e588>
complex128
(10,)
(16,)


## Universal function

- ufunc --- a function that operates on each element in an array, or combine single elements form several input arrays
- process by which array elements are matched up is called broadcasting

In [1]:
import numpy as np

In [2]:
a = np.array([1,2,3,4])
b = np.array([10, 20, 30, 40])

np.add(a, b)

array([11, 22, 33, 44])

In [3]:
### ufunc, broadcasting
np.add(a, 100)

array([101, 102, 103, 104])

In [5]:
### lower dimension -> replicated along additional axes
c = np.arange(4*4).reshape(4,4)
np.add(c, a)

array([[ 1,  3,  5,  7],
       [ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13, 15, 17, 19]])

In [6]:
print(c)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [7]:
### if want to add each column instead of each row
b_col = b[:, np.newaxis]
b_col

array([[10],
       [20],
       [30],
       [40]])

In [8]:
np.add(b_col, c)

array([[10, 11, 12, 13],
       [24, 25, 26, 27],
       [38, 39, 40, 41],
       [52, 53, 54, 55]])

## 3. A large library of array functions

- compiled c code
- LA, math, cumulative functions, logical operators, random number generator

### Limitation

- no every function can be expressed in linear algebra, array
- example: Conways Game of Life
- there is siguation that you really need for loop
    - numpy haikus => hard to read, clever but no readability
- no parallelization

```
def life_step(X)
```

## What is Numba

- function compiler
- type specializing (duck typing)
- just in time
- numerically-focused
    - many string use cases are not going to work well on the GPU.

### Requirements

hardware
- CPU: intel/AMD
- GPU: NVIDA/AMD (ROCm on Linux)
- ARM: raspberry pi and Jetson TX2

In [9]:
import numpy as np
from numba import jit
import math

In [10]:
@jit
def hypot(x, y):
    x = abs(x)
    y = abs(y)
    t = min(x, y)
    x = max(x, y)
    t = t / x
    return x * math.sqrt(1 + t * t)

In [11]:
### original python implementation
hypot.py_func(3, 4)

5.0

In [12]:
### compiled by numba
hypot(3, 4)

5.0

In [25]:
hypot(5, 12)

13.0

In [26]:
hypot(1, 1)

1.4142135623730951

In [28]:
hypot(1, 3**0.5)

2.0

### Benchmarking

In [13]:
%timeit hypot(3, 4)

252 ns ± 2.31 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [15]:
%timeit hypot.py_func(3, 4)

711 ns ± 3.96 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [17]:
%timeit math.hypot(3, 4)

133 ns ± 0.732 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


Numba does introduce some overhead to each function call that is larger than the built-in function in Python

type inference: `.inspect_types()` method

In [18]:
hypot.inspect_types()

hypot (int64, int64)
--------------------------------------------------------------------------------
# File: <ipython-input-10-8b464490a834>
# --- LINE 1 --- 
# label 0

@jit

# --- LINE 2 --- 

def hypot(x, y):

    # --- LINE 3 --- 
    #   x = arg(0, name=x)  :: int64
    #   y = arg(1, name=y)  :: int64
    #   $0.1 = global(abs: <built-in function abs>)  :: Function(<built-in function abs>)
    #   $0.3 = call $0.1(x, func=$0.1, args=[Var(x, <ipython-input-10-8b464490a834> (3))], kws=(), vararg=None)  :: (int64,) -> int64
    #   del x
    #   del $0.1
    #   x.1 = $0.3  :: int64
    #   del $0.3

    x = abs(x)

    # --- LINE 4 --- 
    #   $0.4 = global(abs: <built-in function abs>)  :: Function(<built-in function abs>)
    #   $0.6 = call $0.4(y, func=$0.4, args=[Var(y, <ipython-input-10-8b464490a834> (3))], kws=(), vararg=None)  :: (int64,) -> int64
    #   del y
    #   del $0.4
    #   y.1 = $0.6  :: int64
    #   del $0.6

    y = abs(y)

    # --- LINE 5 --- 
    #   $0

### When things go wrong

object mode exist to enable other numba functionality

In [21]:
@jit
def cannot_compile(x):
    return x['key']

In [22]:
cannot_compile(dict(key="value"))

Compilation is falling back to object mode WITH looplifting enabled because Function "cannot_compile" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-21-9634e7a2d241> (3)

File "<ipython-input-21-9634e7a2d241>", line 3:
def cannot_compile(x):
    return x['key']
    ^

  @jit

File "<ipython-input-21-9634e7a2d241>", line 2:
@jit
def cannot_compile(x):
^

  self.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-21-9634e7a2d241>", line 2:
@jit
def cannot_compile(x):
^



'value'

In [23]:
@jit(nopython=True)
def cannot_compiel(x):
    return x['key']

cannot_compile(dict(key="value"))

'value'

## Exercise

In [29]:
@jit(nopython=True)
def ex1(x, y, out):
    for i in range(x.shape[0]):
        out[i] = x[i] + y[i]

In [30]:
in1 = np.arange(10, dtype=np.float64)
in2 = 2 * in1 + 1
out = np.empty_like(in1)

print("in1", in1)
print("in2", in2)

ex1(in1, in2, out)
print("out", out)

in1 [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
in2 [ 1.  3.  5.  7.  9. 11. 13. 15. 17. 19.]
out [ 1.  4.  7. 10. 13. 16. 19. 22. 25. 28.]


my answer:

In [35]:
@jit(nopython=True)
def ex1(x, y, out):
    for i in range(x.shape[0]):
        out[i] = (x[i]**2 + y[i]**2)**0.5

In [36]:
in1 = np.arange(10, dtype=np.float64)
in2 = 2 * in1 + 1
out = np.empty_like(in1)

ex1(in1, in2, out)

np.testing.assert_almost_equal(out, np.hypot(in1, in2))

demonstrated:

In [None]:
@jit(nopython=True)
def ex1(x, y, out):
    for i in range(x.shape[0]):
        out[i] = hypot(x[i], y[i])